From 6a13a4e5ed10aac3b80e996f5c9e94d000f484b2 Mon Sep 17 00:00:00 2001 From: Jordan Woyak Date: Wed, 20 Feb 2013 20:20:53 -0600 Subject: [PATCH 01/26] Add template "swap" function to CommonFuncs. --- Source/Core/Common/Src/CommonFuncs.h | 34 ++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/Source/Core/Common/Src/CommonFuncs.h b/Source/Core/Common/Src/CommonFuncs.h index 585fe0999a..18dc40e8c8 100644 --- a/Source/Core/Common/Src/CommonFuncs.h +++ b/Source/Core/Common/Src/CommonFuncs.h @@ -162,6 +162,40 @@ inline u16 swap16(const u8* _pData) {return swap16(*(const u16*)_pData);} inline u32 swap32(const u8* _pData) {return swap32(*(const u32*)_pData);} inline u64 swap64(const u8* _pData) {return swap64(*(const u64*)_pData);} +template +void swap(u8*); + +template <> +inline void swap<1>(u8* data) +{} + +template <> +inline void swap<2>(u8* data) +{ + *reinterpret_cast(data) = swap16(data); +} + +template <> +inline void swap<4>(u8* data) +{ + *reinterpret_cast(data) = swap32(data); +} + +template <> +inline void swap<8>(u8* data) +{ + *reinterpret_cast(data) = swap64(data); +} + +template +inline T FromBigEndian(T data) +{ + //static_assert(std::is_arithmetic::value, "function only makes sense with arithmetic types"); + + swap(reinterpret_cast(&data)); + return data; +} + } // Namespace Common #endif // _COMMONFUNCS_H_ From 8f256237a38e97db13d15ec8a761243755587714 Mon Sep 17 00:00:00 2001 From: Jordan Woyak Date: Wed, 20 Feb 2013 20:21:25 -0600 Subject: [PATCH 02/26] Templatify DataReader a bit. --- Source/Core/VideoCommon/Src/DataReader.h | 91 ++++++++---------------- 1 file changed, 31 insertions(+), 60 deletions(-) diff --git a/Source/Core/VideoCommon/Src/DataReader.h b/Source/Core/VideoCommon/Src/DataReader.h index 06668f8bbc..00dca77134 100644 --- a/Source/Core/VideoCommon/Src/DataReader.h +++ b/Source/Core/VideoCommon/Src/DataReader.h @@ -31,43 +31,63 @@ __forceinline void DataSkip(u32 skip) g_pVideoData += skip; } +// probably unnecessary +template +__forceinline void DataSkip() +{ + g_pVideoData += count; +} + +template +__forceinline T DataPeek(int _uOffset) +{ + auto const result = Common::FromBigEndian(*reinterpret_cast(g_pVideoData + _uOffset)); + return result; +} + +// TODO: kill these __forceinline u8 DataPeek8(int _uOffset) { - return g_pVideoData[_uOffset]; + return DataPeek(_uOffset); } __forceinline u16 DataPeek16(int _uOffset) { - return Common::swap16(*(u16*)&g_pVideoData[_uOffset]); + return DataPeek(_uOffset); } __forceinline u32 DataPeek32(int _uOffset) { - return Common::swap32(*(u32*)&g_pVideoData[_uOffset]); + return DataPeek(_uOffset); } +template +__forceinline T DataRead() +{ + auto const result = DataPeek(0); + DataSkip(); + return result; +} + +// TODO: kill these __forceinline u8 DataReadU8() { - return *g_pVideoData++; + return DataRead(); } __forceinline s8 DataReadS8() { - return (s8)(*g_pVideoData++); + return DataRead(); } __forceinline u16 DataReadU16() { - u16 tmp = Common::swap16(*(u16*)g_pVideoData); - g_pVideoData += 2; - return tmp; + return DataRead(); } __forceinline u32 DataReadU32() { - u32 tmp = Common::swap32(*(u32*)g_pVideoData); - g_pVideoData += 4; - return tmp; + return DataRead(); } typedef void (*DataReadU32xNfunc)(u32 *buf); @@ -120,55 +140,6 @@ __forceinline u32 DataReadU32Unswapped() return tmp; } -template -__forceinline T DataRead() -{ - T tmp = *(T*)g_pVideoData; - g_pVideoData += sizeof(T); - return tmp; -} - -template <> -__forceinline u16 DataRead() -{ - u16 tmp = Common::swap16(*(u16*)g_pVideoData); - g_pVideoData += 2; - return tmp; -} - -template <> -__forceinline s16 DataRead() -{ - s16 tmp = (s16)Common::swap16(*(u16*)g_pVideoData); - g_pVideoData += 2; - return tmp; -} - -template <> -__forceinline u32 DataRead() -{ - u32 tmp = (u32)Common::swap32(*(u32*)g_pVideoData); - g_pVideoData += 4; - return tmp; -} - -template <> -__forceinline s32 DataRead() -{ - s32 tmp = (s32)Common::swap32(*(u32*)g_pVideoData); - g_pVideoData += 4; - return tmp; -} - -__forceinline float DataReadF32() -{ - union {u32 i; float f;} temp; - temp.i = Common::swap32(*(u32*)g_pVideoData); - g_pVideoData += 4; - float tmp = temp.f; - return tmp; -} - __forceinline u8* DataGetPosition() { return g_pVideoData; From 5e78f8a176ceda4255164713ff62c08adfded2ca Mon Sep 17 00:00:00 2001 From: Jordan Woyak Date: Wed, 20 Feb 2013 20:43:53 -0600 Subject: [PATCH 03/26] Templatify TextCord Loader functions. --- .../Src/VertexLoader_TextCoord.cpp | 356 ++---------------- 1 file changed, 40 insertions(+), 316 deletions(-) diff --git a/Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp b/Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp index ba3bb73f43..5db2242d1b 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp @@ -39,275 +39,36 @@ void LOADERDECL TexCoord_Read_Dummy() tcIndex++; } -void LOADERDECL TexCoord_ReadDirect_UByte1() +template +void LOADERDECL TexCoord_ReadDirect() { - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU8() * tcScale[tcIndex]; - LOG_TEX1(); - VertexManager::s_pCurBufferPointer += 4; - tcIndex++; -} -void LOADERDECL TexCoord_ReadDirect_UByte2() -{ - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU8() * tcScale[tcIndex]; - ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)DataReadU8() * tcScale[tcIndex]; - LOG_TEX2(); - VertexManager::s_pCurBufferPointer += 8; - tcIndex++; + reinterpret_cast(VertexManager::s_pCurBufferPointer)[0] = DataRead() * tcScale[tcIndex]; + if (N >= 1) + reinterpret_cast(VertexManager::s_pCurBufferPointer)[1] = DataRead() * tcScale[tcIndex]; + //LOG_TEX1(); + //LOG_TEX2(); + VertexManager::s_pCurBufferPointer += sizeof(float) * N; + ++tcIndex; } -void LOADERDECL TexCoord_ReadDirect_Byte1() +template +void LOADERDECL TexCoord_ReadIndex() { - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)DataReadU8() * tcScale[tcIndex]; - LOG_TEX1(); - VertexManager::s_pCurBufferPointer += 4; - tcIndex++; -} -void LOADERDECL TexCoord_ReadDirect_Byte2() -{ - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)DataReadU8() * tcScale[tcIndex]; - ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s8)DataReadU8() * tcScale[tcIndex]; - LOG_TEX2(); - VertexManager::s_pCurBufferPointer += 8; - tcIndex++; -} - -void LOADERDECL TexCoord_ReadDirect_UShort1() -{ - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU16() * tcScale[tcIndex]; - LOG_TEX1(); - VertexManager::s_pCurBufferPointer += 4; - tcIndex++; -} -void LOADERDECL TexCoord_ReadDirect_UShort2() -{ - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU16() * tcScale[tcIndex]; - ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)DataReadU16() * tcScale[tcIndex]; - LOG_TEX2(); - VertexManager::s_pCurBufferPointer += 8; - tcIndex++; -} - -void LOADERDECL TexCoord_ReadDirect_Short1() -{ - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)DataReadU16() * tcScale[tcIndex]; - LOG_TEX1(); - VertexManager::s_pCurBufferPointer += 4; - tcIndex++; -} -void LOADERDECL TexCoord_ReadDirect_Short2() -{ - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)DataReadU16() * tcScale[tcIndex]; - ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s16)DataReadU16() * tcScale[tcIndex]; - LOG_TEX2(); - VertexManager::s_pCurBufferPointer += 8; - tcIndex++; -} - -void LOADERDECL TexCoord_ReadDirect_Float1() -{ - ((u32*)VertexManager::s_pCurBufferPointer)[0] = DataReadU32(); - LOG_TEX1(); - VertexManager::s_pCurBufferPointer += 4; - tcIndex++; -} -void LOADERDECL TexCoord_ReadDirect_Float2() -{ - ((u32*)VertexManager::s_pCurBufferPointer)[0] = DataReadU32(); - ((u32*)VertexManager::s_pCurBufferPointer)[1] = DataReadU32(); - LOG_TEX2(); - VertexManager::s_pCurBufferPointer += 8; - tcIndex++; -} - -// ================================================================================== -void LOADERDECL TexCoord_ReadIndex8_UByte1() -{ - u8 Index = DataReadU8(); - const u8 *pData = cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]); - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(*pData) * tcScale[tcIndex]; - LOG_TEX1(); - VertexManager::s_pCurBufferPointer += 4; - tcIndex++; -} -void LOADERDECL TexCoord_ReadIndex8_UByte2() -{ - u8 Index = DataReadU8(); - const u8 *pData = cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]); - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u8)(pData[0]) * tcScale[tcIndex]; - ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(u8)(pData[1]) * tcScale[tcIndex]; - LOG_TEX2(); - VertexManager::s_pCurBufferPointer += 8; - tcIndex++; -} - -void LOADERDECL TexCoord_ReadIndex8_Byte1() -{ - u8 Index = DataReadU8(); - const u8 *pData = cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]); - - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)(*pData) * tcScale[tcIndex]; - LOG_TEX1(); - VertexManager::s_pCurBufferPointer += 4; - tcIndex++; -} -void LOADERDECL TexCoord_ReadIndex8_Byte2() -{ - u8 Index = DataReadU8(); - const u8 *pData = cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]); - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)(pData[0]) * tcScale[tcIndex]; - ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s8)(pData[1]) * tcScale[tcIndex]; - LOG_TEX2(); - VertexManager::s_pCurBufferPointer += 8; - tcIndex++; -} - -void LOADERDECL TexCoord_ReadIndex8_UShort1() -{ - u8 Index = DataReadU8(); - const u16 *pData = (const u16 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex])); - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u16)Common::swap16(*pData) * tcScale[tcIndex]; - LOG_TEX1(); - VertexManager::s_pCurBufferPointer += 4; - tcIndex++; -} -void LOADERDECL TexCoord_ReadIndex8_UShort2() -{ - u8 Index = DataReadU8(); - const u16 *pData = (const u16 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex])); - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u16)Common::swap16(pData[0]) * tcScale[tcIndex]; - ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(u16)Common::swap16(pData[1]) * tcScale[tcIndex]; - LOG_TEX2(); - VertexManager::s_pCurBufferPointer += 8; - tcIndex++; -} - -void LOADERDECL TexCoord_ReadIndex8_Short1() -{ - u8 Index = DataReadU8(); - const u16 *pData = (const u16 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex])); - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)Common::swap16(pData[0]) * tcScale[tcIndex]; - LOG_TEX1(); - VertexManager::s_pCurBufferPointer += 4; - tcIndex++; -} -void LOADERDECL TexCoord_ReadIndex8_Short2() -{ - u8 Index = DataReadU8(); - const u16 *pData = (const u16 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex])); - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)Common::swap16(pData[0]) * tcScale[tcIndex]; - ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s16)Common::swap16(pData[1]) * tcScale[tcIndex]; - LOG_TEX2(); - VertexManager::s_pCurBufferPointer += 8; - tcIndex++; -} - -void LOADERDECL TexCoord_ReadIndex8_Float1() -{ - u16 Index = DataReadU8(); - const u32 *pData = (const u32 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex])); - ((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]); - LOG_TEX1(); - VertexManager::s_pCurBufferPointer += 4; - tcIndex++; -} -void LOADERDECL TexCoord_ReadIndex8_Float2() -{ - u16 Index = DataReadU8(); - const u32 *pData = (const u32 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex])); - ((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]); - ((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]); - LOG_TEX2(); - VertexManager::s_pCurBufferPointer += 8; - tcIndex++; -} - -// ================================================================================== -void LOADERDECL TexCoord_ReadIndex16_UByte1() -{ - u16 Index = DataReadU16(); - const u8 *pData = cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]); - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u8)(pData[0]) * tcScale[tcIndex]; - LOG_TEX1(); - VertexManager::s_pCurBufferPointer += 4; - tcIndex++; -} -void LOADERDECL TexCoord_ReadIndex16_UByte2() -{ - u16 Index = DataReadU16(); - const u8 *pData = cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]); - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u8)(pData[0]) * tcScale[tcIndex]; - ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(u8)(pData[1]) * tcScale[tcIndex]; - LOG_TEX2(); - VertexManager::s_pCurBufferPointer += 8; - tcIndex++; -} - -void LOADERDECL TexCoord_ReadIndex16_Byte1() -{ - u16 Index = DataReadU16(); - const u8 *pData = cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]); - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)(pData[0]) * tcScale[tcIndex]; - LOG_TEX1(); - VertexManager::s_pCurBufferPointer += 4; - tcIndex++; -} -void LOADERDECL TexCoord_ReadIndex16_Byte2() -{ - u16 Index = DataReadU16(); - const u8 *pData = cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]); - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)(pData[0]) * tcScale[tcIndex]; - ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s8)(pData[1]) * tcScale[tcIndex]; - LOG_TEX2(); - VertexManager::s_pCurBufferPointer += 8; - tcIndex++; -} - -void LOADERDECL TexCoord_ReadIndex16_UShort1() -{ - u16 Index = DataReadU16(); - const u16* pData = (const u16 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex])); - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u16)Common::swap16(pData[0]) * tcScale[tcIndex]; - LOG_TEX1(); - VertexManager::s_pCurBufferPointer += 4; - tcIndex++; -} -void LOADERDECL TexCoord_ReadIndex16_UShort2() -{ - u16 Index = DataReadU16(); - const u16* pData = (const u16 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex])); - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u16)Common::swap16(pData[0]) * tcScale[tcIndex]; - ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(u16)Common::swap16(pData[1]) * tcScale[tcIndex]; - LOG_TEX2(); - VertexManager::s_pCurBufferPointer += 8; - tcIndex++; -} - -void LOADERDECL TexCoord_ReadIndex16_Short1() -{ - u16 Index = DataReadU16(); - const u16 *pData = (const u16 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex])); - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)Common::swap16(*pData) * tcScale[tcIndex]; - LOG_TEX1(); - VertexManager::s_pCurBufferPointer += 4; - tcIndex++; -} - -void LOADERDECL TexCoord_ReadIndex16_Short2() -{ - // Heavy in ZWW - u16 Index = DataReadU16(); - const u16 *pData = (const u16 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex])); - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)Common::swap16(pData[0]) * tcScale[tcIndex]; - ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s16)Common::swap16(pData[1]) * tcScale[tcIndex]; - LOG_TEX2(); - VertexManager::s_pCurBufferPointer += 8; - tcIndex++; + auto const index = DataRead(); + auto const data = reinterpret_cast(cached_arraybases[ARRAY_TEXCOORD0 + tcIndex] + (index * arraystrides[ARRAY_TEXCOORD0 + tcIndex])); + reinterpret_cast(VertexManager::s_pCurBufferPointer)[0] = Common::FromBigEndian(data[0]) * tcScale[tcIndex]; + if (N >= 1) + reinterpret_cast(VertexManager::s_pCurBufferPointer)[1] = Common::FromBigEndian(data[1]) * tcScale[tcIndex]; + //LOG_TEX1(); + //LOG_TEX2(); + VertexManager::s_pCurBufferPointer += sizeof(float) * N; + ++tcIndex; } #if _M_SSE >= 0x401 static const __m128i kMaskSwap16_2 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFL, 0x02030001L); -void LOADERDECL TexCoord_ReadIndex16_Short2_SSE4() +void LOADERDECL TexCoord_ReadIndex16_Short2_SSE4() { // Heavy in ZWW u16 Index = DataReadU16(); @@ -325,27 +86,6 @@ void LOADERDECL TexCoord_ReadIndex16_Short2_SSE4() } #endif -void LOADERDECL TexCoord_ReadIndex16_Float1() -{ - u16 Index = DataReadU16(); - const u32 *pData = (const u32 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex])); - ((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]); - LOG_TEX1(); - VertexManager::s_pCurBufferPointer += 4; - tcIndex++; -} - -void LOADERDECL TexCoord_ReadIndex16_Float2() -{ - u16 Index = DataReadU16(); - const u32 *pData = (const u32 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex])); - ((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]); - ((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]); - LOG_TEX2(); - VertexManager::s_pCurBufferPointer += 8; - tcIndex++; -} - #if _M_SSE >= 0x301 static const __m128i kMaskSwap32 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x04050607L, 0x00010203L); @@ -373,56 +113,40 @@ static TPipelineFunction tableReadTexCoord[4][8][2] = { {NULL, NULL,}, }, { - {TexCoord_ReadDirect_UByte1, TexCoord_ReadDirect_UByte2,}, - {TexCoord_ReadDirect_Byte1, TexCoord_ReadDirect_Byte2,}, - {TexCoord_ReadDirect_UShort1, TexCoord_ReadDirect_UShort2,}, - {TexCoord_ReadDirect_Short1, TexCoord_ReadDirect_Short2,}, - {TexCoord_ReadDirect_Float1, TexCoord_ReadDirect_Float2,}, + {TexCoord_ReadDirect, TexCoord_ReadDirect,}, + {TexCoord_ReadDirect, TexCoord_ReadDirect,}, + {TexCoord_ReadDirect, TexCoord_ReadDirect,}, + {TexCoord_ReadDirect, TexCoord_ReadDirect,}, + {TexCoord_ReadDirect, TexCoord_ReadDirect,}, }, { - {TexCoord_ReadIndex8_UByte1, TexCoord_ReadIndex8_UByte2,}, - {TexCoord_ReadIndex8_Byte1, TexCoord_ReadIndex8_Byte2,}, - {TexCoord_ReadIndex8_UShort1, TexCoord_ReadIndex8_UShort2,}, - {TexCoord_ReadIndex8_Short1, TexCoord_ReadIndex8_Short2,}, - {TexCoord_ReadIndex8_Float1, TexCoord_ReadIndex8_Float2,}, + {TexCoord_ReadIndex, TexCoord_ReadIndex,}, + {TexCoord_ReadIndex, TexCoord_ReadIndex,}, + {TexCoord_ReadIndex, TexCoord_ReadIndex,}, + {TexCoord_ReadIndex, TexCoord_ReadIndex,}, + {TexCoord_ReadIndex, TexCoord_ReadIndex,}, }, { - {TexCoord_ReadIndex16_UByte1, TexCoord_ReadIndex16_UByte2,}, - {TexCoord_ReadIndex16_Byte1, TexCoord_ReadIndex16_Byte2,}, - {TexCoord_ReadIndex16_UShort1, TexCoord_ReadIndex16_UShort2,}, - {TexCoord_ReadIndex16_Short1, TexCoord_ReadIndex16_Short2,}, - {TexCoord_ReadIndex16_Float1, TexCoord_ReadIndex16_Float2,}, + {TexCoord_ReadIndex, TexCoord_ReadIndex,}, + {TexCoord_ReadIndex, TexCoord_ReadIndex,}, + {TexCoord_ReadIndex, TexCoord_ReadIndex,}, + {TexCoord_ReadIndex, TexCoord_ReadIndex,}, + {TexCoord_ReadIndex, TexCoord_ReadIndex,}, }, }; static int tableReadTexCoordVertexSize[4][8][2] = { { - {0, 0,}, - {0, 0,}, - {0, 0,}, - {0, 0,}, - {0, 0,}, + {0, 0,}, {0, 0,}, {0, 0,}, {0, 0,}, {0, 0,}, }, { - {1, 2,}, - {1, 2,}, - {2, 4,}, - {2, 4,}, - {4, 8,}, + {1, 2,}, {1, 2,}, {2, 4,}, {2, 4,}, {4, 8,}, }, { - {1, 1,}, - {1, 1,}, - {1, 1,}, - {1, 1,}, - {1, 1,}, + {1, 1,}, {1, 1,}, {1, 1,}, {1, 1,}, {1, 1,}, }, { - {2, 2,}, - {2, 2,}, - {2, 2,}, - {2, 2,}, - {2, 2,}, + {2, 2,}, {2, 2,}, {2, 2,}, {2, 2,}, {2, 2,}, }, }; From 5b63472ad7ec1dc93fc9efac39e55e636f5f0fc0 Mon Sep 17 00:00:00 2001 From: Jordan Woyak Date: Wed, 20 Feb 2013 22:22:41 -0600 Subject: [PATCH 04/26] Templatify Normal Loader functions. --- .../VideoCommon/Src/VertexLoader_Normal.cpp | 499 +++++------------- .../VideoCommon/Src/VertexLoader_Normal.h | 30 -- .../Src/VertexLoader_TextCoord.cpp | 2 + 3 files changed, 132 insertions(+), 399 deletions(-) diff --git a/Source/Core/VideoCommon/Src/VertexLoader_Normal.cpp b/Source/Core/VideoCommon/Src/VertexLoader_Normal.cpp index 830bd3de13..a53425b1ec 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader_Normal.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader_Normal.cpp @@ -22,6 +22,7 @@ #include "VertexManagerBase.h" #include "CPUDetect.h" #include +#include #if _M_SSE >= 0x401 #include @@ -34,74 +35,143 @@ VertexLoader_Normal::Set VertexLoader_Normal::m_Table[NUM_NRM_TYPE][NUM_NRM_INDICES][NUM_NRM_ELEMENTS][NUM_NRM_FORMAT]; +namespace +{ + +template +float FracAdjust(T val) +{ + //auto const S8FRAC = 1.f / (1u << 6); + //auto const U8FRAC = 1.f / (1u << 7); + //auto const S16FRAC = 1.f / (1u << 14); + //auto const U16FRAC = 1.f / (1u << 15); + + return val / float(1u << (sizeof(T) * 8 - std::numeric_limits::is_signed - 1)); +} + +template <> +float FracAdjust(float val) +{ return val; } + +template +inline void ReadIndirect(const T* data) +{ + static_assert(3 == N || 9 == N, "N is only sane as 3 or 9!"); + + auto const dest = reinterpret_cast(VertexManager::s_pCurBufferPointer); + + for (int i = 0; i != N; ++i) + { + dest[i] = FracAdjust(Common::FromBigEndian(data[i])); + LOG_NORM(); + } + + VertexManager::s_pCurBufferPointer += sizeof(float) * N; +} + +template +void LOADERDECL Normal_Direct() +{ + auto const source = reinterpret_cast(DataGetPosition()); + ReadIndirect(source); + DataSkip(); +} + +template +inline void Normal_Index_Offset() +{ + static_assert(!std::numeric_limits::is_signed, "Only unsigned I is sane!"); + + auto const index = DataRead(); + auto const data = reinterpret_cast(cached_arraybases[ARRAY_NORMAL] + + (index * arraystrides[ARRAY_NORMAL]) + sizeof(T) * 3 * Offset); + ReadIndirect(data); +} + +template +void LOADERDECL Normal_Index() +{ + Normal_Index_Offset(); +} + +template +void LOADERDECL Normal_Index_Indices3() +{ + Normal_Index_Offset(); + Normal_Index_Offset(); + Normal_Index_Offset(); +} + +} + void VertexLoader_Normal::Init(void) { // HACK is for signed instead of unsigned to prevent crashes. - m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Set(3, Normal_DirectByte); //HACK - m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Set(3, Normal_DirectByte); - m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Set(6, Normal_DirectShort); //HACK - m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Set(6, Normal_DirectShort); - m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Set(12, Normal_DirectFloat); - m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Set(9, Normal_DirectByte3); //HACK - m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Set(9, Normal_DirectByte3); - m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Set(18, Normal_DirectShort3); //HACK - m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Set(18, Normal_DirectShort3); - m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Set(36, Normal_DirectFloat3); + m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Set(3, Normal_Direct); //HACK + m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Set(3, Normal_Direct); + m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Set(6, Normal_Direct); //HACK + m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Set(6, Normal_Direct); + m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Set(12, Normal_Direct); + m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Set(9, Normal_Direct); //HACK + m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Set(9, Normal_Direct); + m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Set(18, Normal_Direct); //HACK + m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Set(18, Normal_Direct); + m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Set(36, Normal_Direct); - m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Set(3, Normal_DirectByte); //HACK - m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Set(3, Normal_DirectByte); - m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Set(6, Normal_DirectShort); //HACK - m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Set(6, Normal_DirectShort); - m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Set(12, Normal_DirectFloat); - m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Set(9, Normal_DirectByte3); //HACK - m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Set(9, Normal_DirectByte3); - m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Set(18, Normal_DirectShort3); //HACK - m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Set(18, Normal_DirectShort3); - m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Set(36, Normal_DirectFloat3); + m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Set(3, Normal_Direct); //HACK + m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Set(3, Normal_Direct); + m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Set(6, Normal_Direct); //HACK + m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Set(6, Normal_Direct); + m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Set(12, Normal_Direct); + m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Set(9, Normal_Direct); //HACK + m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Set(9, Normal_Direct); + m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Set(18, Normal_Direct); //HACK + m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Set(18, Normal_Direct); + m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Set(36, Normal_Direct); - m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Set(1, Normal_Index8_Byte); //HACK - m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Set(1, Normal_Index8_Byte); - m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Set(1, Normal_Index8_Short); //HACK - m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Set(1, Normal_Index8_Short); - m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Set(1, Normal_Index8_Float); - m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Set(1, Normal_Index8_Byte3_Indices1); //HACK - m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Set(1, Normal_Index8_Byte3_Indices1); - m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Set(1, Normal_Index8_Short3_Indices1); //HACK - m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Set(1, Normal_Index8_Short3_Indices1); - m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Set(1, Normal_Index8_Float3_Indices1); + m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Set(1, Normal_Index); //HACK + m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Set(1, Normal_Index); + m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Set(1, Normal_Index); //HACK + m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Set(1, Normal_Index); + m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Set(1, Normal_Index); + m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Set(1, Normal_Index); //HACK + m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Set(1, Normal_Index); + m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Set(1, Normal_Index); //HACK + m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Set(1, Normal_Index); + m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Set(1, Normal_Index); - m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Set(1, Normal_Index8_Byte); //HACK - m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Set(1, Normal_Index8_Byte); - m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Set(1, Normal_Index8_Short); //HACK - m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Set(1, Normal_Index8_Short); - m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Set(1, Normal_Index8_Float); - m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Set(3, Normal_Index8_Byte3_Indices3); //HACK - m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Set(3, Normal_Index8_Byte3_Indices3); - m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Set(3, Normal_Index8_Short3_Indices3); //HACK - m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Set(3, Normal_Index8_Short3_Indices3); - m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Set(3, Normal_Index8_Float3_Indices3); + m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Set(1, Normal_Index); //HACK + m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Set(1, Normal_Index); + m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Set(1, Normal_Index); //HACK + m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Set(1, Normal_Index); + m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Set(1, Normal_Index); + m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Set(3, Normal_Index_Indices3); //HACK + m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Set(3, Normal_Index_Indices3); + m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Set(3, Normal_Index_Indices3); //HACK + m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Set(3, Normal_Index_Indices3); + m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Set(3, Normal_Index_Indices3); - m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Set(2, Normal_Index16_Byte); //HACK - m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Set(2, Normal_Index16_Byte); - m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Set(2, Normal_Index16_Short); //HACK - m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Set(2, Normal_Index16_Short); - m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Set(2, Normal_Index16_Float); - m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Set(2, Normal_Index16_Byte3_Indices1); //HACK - m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Set(2, Normal_Index16_Byte3_Indices1); - m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Set(2, Normal_Index16_Short3_Indices1); //HACK - m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Set(2, Normal_Index16_Short3_Indices1); - m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Set(2, Normal_Index16_Float3_Indices1); + m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Set(2, Normal_Index); //HACK + m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Set(2, Normal_Index); + m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Set(2, Normal_Index); //HACK + m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Set(2, Normal_Index); + m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Set(2, Normal_Index); + m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Set(2, Normal_Index); //HACK + m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Set(2, Normal_Index); + m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Set(2, Normal_Index); //HACK + m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Set(2, Normal_Index); + m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Set(2, Normal_Index); - m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Set(2, Normal_Index16_Byte); //HACK - m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Set(2, Normal_Index16_Byte); - m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Set(2, Normal_Index16_Short); //HACK - m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Set(2, Normal_Index16_Short); - m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Set(2, Normal_Index16_Float); - m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Set(6, Normal_Index16_Byte3_Indices3); //HACK - m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Set(6, Normal_Index16_Byte3_Indices3); - m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Set(6, Normal_Index16_Short3_Indices3); //HACK - m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Set(6, Normal_Index16_Short3_Indices3); - m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Set(6, Normal_Index16_Float3_Indices3); + m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Set(2, Normal_Index); //HACK + m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Set(2, Normal_Index); + m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Set(2, Normal_Index); //HACK + m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Set(2, Normal_Index); + m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Set(2, Normal_Index); + m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Set(6, Normal_Index_Indices3); //HACK + m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Set(6, Normal_Index_Indices3); + m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Set(6, Normal_Index_Indices3); //HACK + m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Set(6, Normal_Index_Indices3); + m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Set(6, Normal_Index_Indices3); } unsigned int VertexLoader_Normal::GetSize(unsigned int _type, @@ -116,312 +186,3 @@ TPipelineFunction VertexLoader_Normal::GetFunction(unsigned int _type, TPipelineFunction pFunc = m_Table[_type][_index3][_elements][_format].function; return pFunc; } - -// This fracs are fixed acording to format -#define S8FRAC 0.015625f; // 1.0f / (1U << 6) -#define S16FRAC 0.00006103515625f; // 1.0f / (1U << 14) -// --- Direct --- - -inline void ReadIndirectS8x3(const s8* pData) -{ - ((float*)VertexManager::s_pCurBufferPointer)[0] = pData[0] * S8FRAC; - ((float*)VertexManager::s_pCurBufferPointer)[1] = pData[1] * S8FRAC; - ((float*)VertexManager::s_pCurBufferPointer)[2] = pData[2] * S8FRAC; - VertexManager::s_pCurBufferPointer += 12; - LOG_NORM(); -} - -inline void ReadIndirectS8x9(const s8* pData) -{ - ((float*)VertexManager::s_pCurBufferPointer)[0] = pData[0] * S8FRAC; - ((float*)VertexManager::s_pCurBufferPointer)[1] = pData[1] * S8FRAC; - ((float*)VertexManager::s_pCurBufferPointer)[2] = pData[2] * S8FRAC; - LOG_NORM(); - ((float*)VertexManager::s_pCurBufferPointer)[3] = pData[3] * S8FRAC; - ((float*)VertexManager::s_pCurBufferPointer)[4] = pData[4] * S8FRAC; - ((float*)VertexManager::s_pCurBufferPointer)[5] = pData[5] * S8FRAC; - LOG_NORM(); - ((float*)VertexManager::s_pCurBufferPointer)[6] = pData[6] * S8FRAC; - ((float*)VertexManager::s_pCurBufferPointer)[7] = pData[7] * S8FRAC; - ((float*)VertexManager::s_pCurBufferPointer)[8] = pData[8] * S8FRAC; - LOG_NORM(); - VertexManager::s_pCurBufferPointer += 36; -} - -inline void ReadIndirectS16x3(const u16* pData) -{ - ((float*)VertexManager::s_pCurBufferPointer)[0] = ((s16)Common::swap16(pData[0])) * S16FRAC; - ((float*)VertexManager::s_pCurBufferPointer)[1] = ((s16)Common::swap16(pData[1])) * S16FRAC; - ((float*)VertexManager::s_pCurBufferPointer)[2] = ((s16)Common::swap16(pData[2])) * S16FRAC; - VertexManager::s_pCurBufferPointer += 12; - LOG_NORM() -} - -inline void ReadIndirectS16x9(const u16* pData) -{ - ((float*)VertexManager::s_pCurBufferPointer)[0] = ((s16)Common::swap16(pData[0])) * S16FRAC; - ((float*)VertexManager::s_pCurBufferPointer)[1] = ((s16)Common::swap16(pData[1])) * S16FRAC; - ((float*)VertexManager::s_pCurBufferPointer)[2] = ((s16)Common::swap16(pData[2])) * S16FRAC; - LOG_NORM() - ((float*)VertexManager::s_pCurBufferPointer)[3] = ((s16)Common::swap16(pData[3])) * S16FRAC; - ((float*)VertexManager::s_pCurBufferPointer)[4] = ((s16)Common::swap16(pData[4])) * S16FRAC; - ((float*)VertexManager::s_pCurBufferPointer)[5] = ((s16)Common::swap16(pData[5])) * S16FRAC; - LOG_NORM() - ((float*)VertexManager::s_pCurBufferPointer)[6] = ((s16)Common::swap16(pData[6])) * S16FRAC; - ((float*)VertexManager::s_pCurBufferPointer)[7] = ((s16)Common::swap16(pData[7])) * S16FRAC; - ((float*)VertexManager::s_pCurBufferPointer)[8] = ((s16)Common::swap16(pData[8])) * S16FRAC; - LOG_NORM() - VertexManager::s_pCurBufferPointer += 36; -} - -inline void ReadIndirectFloatx3(const u32* pData) -{ - ((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]); - ((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]); - ((u32*)VertexManager::s_pCurBufferPointer)[2] = Common::swap32(pData[2]); - VertexManager::s_pCurBufferPointer += 12; - LOG_NORM(); -} - -inline void ReadIndirectFloatx9(const u32* pData) -{ - ((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]); - ((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]); - ((u32*)VertexManager::s_pCurBufferPointer)[2] = Common::swap32(pData[2]); - LOG_NORM(); - ((u32*)VertexManager::s_pCurBufferPointer)[3] = Common::swap32(pData[3]); - ((u32*)VertexManager::s_pCurBufferPointer)[4] = Common::swap32(pData[4]); - ((u32*)VertexManager::s_pCurBufferPointer)[5] = Common::swap32(pData[5]); - LOG_NORM(); - ((u32*)VertexManager::s_pCurBufferPointer)[6] = Common::swap32(pData[6]); - ((u32*)VertexManager::s_pCurBufferPointer)[7] = Common::swap32(pData[7]); - ((u32*)VertexManager::s_pCurBufferPointer)[8] = Common::swap32(pData[8]); - LOG_NORM(); - VertexManager::s_pCurBufferPointer += 36; -} - -inline void ReadDirectS8x3() -{ - const s8* Source = (const s8*)DataGetPosition(); - ReadIndirectS8x3(Source); - DataSkip(3); -} - -inline void ReadDirectS8x9() -{ - const s8* Source = (const s8*)DataGetPosition(); - ReadIndirectS8x9(Source); - DataSkip(9); -} - -inline void ReadDirectS16x3() -{ - const u16* Source = (const u16*)DataGetPosition(); - ReadIndirectS16x3(Source); - DataSkip(6); -} - -inline void ReadDirectS16x9() -{ - const u16* Source = (const u16*)DataGetPosition(); - ReadIndirectS16x9(Source); - DataSkip(18); -} - -inline void ReadDirectFloatx3() -{ - const u32* Source = (const u32*)DataGetPosition(); - ReadIndirectFloatx3(Source); - DataSkip(12); -} - -inline void ReadDirectFloatx9() -{ - const u32* Source = (const u32*)DataGetPosition(); - ReadIndirectFloatx9(Source); - DataSkip(36); -} - - - -void LOADERDECL VertexLoader_Normal::Normal_DirectByte() -{ - ReadDirectS8x3(); -} - -void LOADERDECL VertexLoader_Normal::Normal_DirectShort() -{ - ReadDirectS16x3(); -} - -void LOADERDECL VertexLoader_Normal::Normal_DirectFloat() -{ - ReadDirectFloatx3(); -} - -void LOADERDECL VertexLoader_Normal::Normal_DirectByte3() -{ - ReadDirectS8x9(); -} - -void LOADERDECL VertexLoader_Normal::Normal_DirectShort3() -{ - ReadDirectS16x9(); -} - -void LOADERDECL VertexLoader_Normal::Normal_DirectFloat3() -{ - ReadDirectFloatx9(); -} - - -// --- Index8 --- - -void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte() -{ - u8 Index = DataReadU8(); - const s8* pData = (const s8 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL])); - ReadIndirectS8x3(pData); -} - -void LOADERDECL VertexLoader_Normal::Normal_Index8_Short() -{ - u8 Index = DataReadU8(); - const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL])); - ReadIndirectS16x3(pData); -} - -void LOADERDECL VertexLoader_Normal::Normal_Index8_Float() -{ - u8 Index = DataReadU8(); - const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL])); - ReadIndirectFloatx3(pData); -} - -void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte3_Indices1() -{ - u8 Index = DataReadU8(); - const s8* pData = (const s8 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL])); - ReadIndirectS8x9(pData); -} - -void LOADERDECL VertexLoader_Normal::Normal_Index8_Short3_Indices1() -{ - u8 Index = DataReadU8(); - const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL])); - ReadIndirectS16x9(pData); -} - -void LOADERDECL VertexLoader_Normal::Normal_Index8_Float3_Indices1() -{ - u8 Index = DataReadU8(); - const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL])); - ReadIndirectFloatx9(pData); -} - -void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte3_Indices3() -{ - for (int i = 0; i < 3; i++) - { - u8 Index = DataReadU8(); - const s8* pData = (const s8*)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 1*3*i); - ReadIndirectS8x3(pData); - } -} - - -void LOADERDECL VertexLoader_Normal::Normal_Index8_Short3_Indices3() -{ - for (int i = 0; i < 3; i++) - { - u8 Index = DataReadU8(); - const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 2*3*i); - ReadIndirectS16x3(pData); - } -} - -void LOADERDECL VertexLoader_Normal::Normal_Index8_Float3_Indices3() -{ - for (int i = 0; i < 3; i++) - { - u8 Index = DataReadU8(); - const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 4*3*i); - ReadIndirectFloatx3(pData); - } -} - - -// --- Index16 --- - - -void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte() -{ - u16 Index = DataReadU16(); - const s8* pData = (const s8 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL])); - ReadIndirectS8x3(pData); -} - -void LOADERDECL VertexLoader_Normal::Normal_Index16_Short() -{ - u16 Index = DataReadU16(); - const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL])); - ReadIndirectS16x3(pData); -} - -void LOADERDECL VertexLoader_Normal::Normal_Index16_Float() -{ - u16 Index = DataReadU16(); - const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL])); - ReadIndirectFloatx3(pData); -} - -void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte3_Indices1() -{ - u16 Index = DataReadU16(); - const s8* pData = (const s8 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL])); - ReadIndirectS8x9(pData); -} - -void LOADERDECL VertexLoader_Normal::Normal_Index16_Short3_Indices1() -{ - u16 Index = DataReadU16(); - const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL])); - ReadIndirectS16x9(pData); -} - -void LOADERDECL VertexLoader_Normal::Normal_Index16_Float3_Indices1() -{ - u16 Index = DataReadU16(); - const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL])); - ReadIndirectFloatx9(pData); -} - -void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte3_Indices3() -{ - for (int i = 0; i < 3; i++) - { - u16 Index = DataReadU16(); - const s8* pData = (const s8*)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 1*3*i); - ReadIndirectS8x3(pData); - } -} - -void LOADERDECL VertexLoader_Normal::Normal_Index16_Short3_Indices3() -{ - for (int i = 0; i < 3; i++) - { - u16 Index = DataReadU16(); - const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 2*3*i); - ReadIndirectS16x3(pData); - } -} - -void LOADERDECL VertexLoader_Normal::Normal_Index16_Float3_Indices3() -{ - for (int i = 0; i < 3; i++) - { - u16 Index = DataReadU16(); - const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 4*3*i); - ReadIndirectFloatx3(pData); - } -} diff --git a/Source/Core/VideoCommon/Src/VertexLoader_Normal.h b/Source/Core/VideoCommon/Src/VertexLoader_Normal.h index 934cd1ec43..71d4387ff0 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader_Normal.h +++ b/Source/Core/VideoCommon/Src/VertexLoader_Normal.h @@ -79,36 +79,6 @@ private: }; static Set m_Table[NUM_NRM_TYPE][NUM_NRM_INDICES][NUM_NRM_ELEMENTS][NUM_NRM_FORMAT]; - - // direct - static void LOADERDECL Normal_DirectByte(); - static void LOADERDECL Normal_DirectShort(); - static void LOADERDECL Normal_DirectFloat(); - static void LOADERDECL Normal_DirectByte3(); - static void LOADERDECL Normal_DirectShort3(); - static void LOADERDECL Normal_DirectFloat3(); - - // index8 - static void LOADERDECL Normal_Index8_Byte(); - static void LOADERDECL Normal_Index8_Short(); - static void LOADERDECL Normal_Index8_Float(); - static void LOADERDECL Normal_Index8_Byte3_Indices1(); - static void LOADERDECL Normal_Index8_Short3_Indices1(); - static void LOADERDECL Normal_Index8_Float3_Indices1(); - static void LOADERDECL Normal_Index8_Byte3_Indices3(); - static void LOADERDECL Normal_Index8_Short3_Indices3(); - static void LOADERDECL Normal_Index8_Float3_Indices3(); - - // index16 - static void LOADERDECL Normal_Index16_Byte(); - static void LOADERDECL Normal_Index16_Short(); - static void LOADERDECL Normal_Index16_Float(); - static void LOADERDECL Normal_Index16_Byte3_Indices1(); - static void LOADERDECL Normal_Index16_Short3_Indices1(); - static void LOADERDECL Normal_Index16_Float3_Indices1(); - static void LOADERDECL Normal_Index16_Byte3_Indices3(); - static void LOADERDECL Normal_Index16_Short3_Indices3(); - static void LOADERDECL Normal_Index16_Float3_Indices3(); }; #endif diff --git a/Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp b/Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp index 5db2242d1b..8bbd1197d5 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp @@ -54,6 +54,8 @@ void LOADERDECL TexCoord_ReadDirect() template void LOADERDECL TexCoord_ReadIndex() { + static_assert(!std::numeric_limits::is_signed, "Only unsigned I is sane!"); + auto const index = DataRead(); auto const data = reinterpret_cast(cached_arraybases[ARRAY_TEXCOORD0 + tcIndex] + (index * arraystrides[ARRAY_TEXCOORD0 + tcIndex])); reinterpret_cast(VertexManager::s_pCurBufferPointer)[0] = Common::FromBigEndian(data[0]) * tcScale[tcIndex]; From 3938d3713b23d264e78a19b5bc1cb751630ee9ba Mon Sep 17 00:00:00 2001 From: Jordan Woyak Date: Thu, 21 Feb 2013 00:40:22 -0600 Subject: [PATCH 05/26] Fix what I broke. --- .../Src/VertexLoader_TextCoord.cpp | 54 ++++++++++++++----- 1 file changed, 41 insertions(+), 13 deletions(-) diff --git a/Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp b/Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp index 8bbd1197d5..b5e7d890ff 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp @@ -28,8 +28,20 @@ #include #endif -#define LOG_TEX1() // PRIM_LOG("tex: %f, ", ((float*)VertexManager::s_pCurBufferPointer)[0]); -#define LOG_TEX2() // PRIM_LOG("tex: %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[0], ((float*)VertexManager::s_pCurBufferPointer)[1]); +template +void LOG_TEX(); + +template <> +__forceinline void LOG_TEX<1>() +{ + // PRIM_LOG("tex: %f, ", ((float*)VertexManager::s_pCurBufferPointer)[0]); +} + +template <> +__forceinline void LOG_TEX<2>() +{ + // PRIM_LOG("tex: %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[0], ((float*)VertexManager::s_pCurBufferPointer)[1]); +} extern int tcIndex; extern float tcScale[8]; @@ -39,14 +51,26 @@ void LOADERDECL TexCoord_Read_Dummy() tcIndex++; } +template +float TCScale(T val) +{ + return val * tcScale[tcIndex]; +} + +template <> +float TCScale(float val) +{ return val; } + template void LOADERDECL TexCoord_ReadDirect() { - reinterpret_cast(VertexManager::s_pCurBufferPointer)[0] = DataRead() * tcScale[tcIndex]; - if (N >= 1) - reinterpret_cast(VertexManager::s_pCurBufferPointer)[1] = DataRead() * tcScale[tcIndex]; - //LOG_TEX1(); - //LOG_TEX2(); + auto const dest = reinterpret_cast(VertexManager::s_pCurBufferPointer); + + for (int i = 0; i != N; ++i) + dest[i] = TCScale(DataRead()); + + LOG_TEX(); + VertexManager::s_pCurBufferPointer += sizeof(float) * N; ++tcIndex; } @@ -57,12 +81,16 @@ void LOADERDECL TexCoord_ReadIndex() static_assert(!std::numeric_limits::is_signed, "Only unsigned I is sane!"); auto const index = DataRead(); - auto const data = reinterpret_cast(cached_arraybases[ARRAY_TEXCOORD0 + tcIndex] + (index * arraystrides[ARRAY_TEXCOORD0 + tcIndex])); - reinterpret_cast(VertexManager::s_pCurBufferPointer)[0] = Common::FromBigEndian(data[0]) * tcScale[tcIndex]; - if (N >= 1) - reinterpret_cast(VertexManager::s_pCurBufferPointer)[1] = Common::FromBigEndian(data[1]) * tcScale[tcIndex]; - //LOG_TEX1(); - //LOG_TEX2(); + auto const data = reinterpret_cast(cached_arraybases[ARRAY_TEXCOORD0 + tcIndex] + + (index * arraystrides[ARRAY_TEXCOORD0 + tcIndex])); + + auto const dest = reinterpret_cast(VertexManager::s_pCurBufferPointer); + + for (int i = 0; i != N; ++i) + dest[i] = TCScale(Common::FromBigEndian(data[i])); + + LOG_TEX(); + VertexManager::s_pCurBufferPointer += sizeof(float) * N; ++tcIndex; } From 5efdcef34ad06a94c36a7cf7c1e84bbaee26d215 Mon Sep 17 00:00:00 2001 From: Jordan Woyak Date: Thu, 21 Feb 2013 00:49:47 -0600 Subject: [PATCH 06/26] No longer have the signed/unsigned hack I guess. --- .../VideoCommon/Src/VertexLoader_Normal.cpp | 50 +++++++++---------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/Source/Core/VideoCommon/Src/VertexLoader_Normal.cpp b/Source/Core/VideoCommon/Src/VertexLoader_Normal.cpp index a53425b1ec..34a3190eff 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader_Normal.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader_Normal.cpp @@ -46,6 +46,7 @@ float FracAdjust(T val) //auto const S16FRAC = 1.f / (1u << 14); //auto const U16FRAC = 1.f / (1u << 15); + // TODO: is this right? return val / float(1u << (sizeof(T) * 8 - std::numeric_limits::is_signed - 1)); } @@ -106,70 +107,69 @@ void LOADERDECL Normal_Index_Indices3() void VertexLoader_Normal::Init(void) { - // HACK is for signed instead of unsigned to prevent crashes. - m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Set(3, Normal_Direct); //HACK + m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Set(3, Normal_Direct); m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Set(3, Normal_Direct); - m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Set(6, Normal_Direct); //HACK + m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Set(6, Normal_Direct); m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Set(6, Normal_Direct); m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Set(12, Normal_Direct); - m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Set(9, Normal_Direct); //HACK + m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Set(9, Normal_Direct); m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Set(9, Normal_Direct); - m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Set(18, Normal_Direct); //HACK + m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Set(18, Normal_Direct); m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Set(18, Normal_Direct); m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Set(36, Normal_Direct); - m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Set(3, Normal_Direct); //HACK + m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Set(3, Normal_Direct); m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Set(3, Normal_Direct); - m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Set(6, Normal_Direct); //HACK + m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Set(6, Normal_Direct); m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Set(6, Normal_Direct); m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Set(12, Normal_Direct); - m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Set(9, Normal_Direct); //HACK + m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Set(9, Normal_Direct); m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Set(9, Normal_Direct); - m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Set(18, Normal_Direct); //HACK + m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Set(18, Normal_Direct); m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Set(18, Normal_Direct); m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Set(36, Normal_Direct); - m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Set(1, Normal_Index); //HACK + m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Set(1, Normal_Index); m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Set(1, Normal_Index); - m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Set(1, Normal_Index); //HACK + m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Set(1, Normal_Index); m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Set(1, Normal_Index); m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Set(1, Normal_Index); - m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Set(1, Normal_Index); //HACK + m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Set(1, Normal_Index); m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Set(1, Normal_Index); - m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Set(1, Normal_Index); //HACK + m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Set(1, Normal_Index); m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Set(1, Normal_Index); m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Set(1, Normal_Index); - m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Set(1, Normal_Index); //HACK + m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Set(1, Normal_Index); m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Set(1, Normal_Index); - m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Set(1, Normal_Index); //HACK + m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Set(1, Normal_Index); m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Set(1, Normal_Index); m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Set(1, Normal_Index); - m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Set(3, Normal_Index_Indices3); //HACK + m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Set(3, Normal_Index_Indices3); m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Set(3, Normal_Index_Indices3); - m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Set(3, Normal_Index_Indices3); //HACK + m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Set(3, Normal_Index_Indices3); m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Set(3, Normal_Index_Indices3); m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Set(3, Normal_Index_Indices3); - m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Set(2, Normal_Index); //HACK + m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Set(2, Normal_Index); m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Set(2, Normal_Index); - m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Set(2, Normal_Index); //HACK + m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Set(2, Normal_Index); m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Set(2, Normal_Index); m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Set(2, Normal_Index); - m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Set(2, Normal_Index); //HACK + m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Set(2, Normal_Index); m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Set(2, Normal_Index); - m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Set(2, Normal_Index); //HACK + m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Set(2, Normal_Index); m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Set(2, Normal_Index); m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Set(2, Normal_Index); - m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Set(2, Normal_Index); //HACK + m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Set(2, Normal_Index); m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Set(2, Normal_Index); - m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Set(2, Normal_Index); //HACK + m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Set(2, Normal_Index); m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Set(2, Normal_Index); m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Set(2, Normal_Index); - m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Set(6, Normal_Index_Indices3); //HACK + m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Set(6, Normal_Index_Indices3); m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Set(6, Normal_Index_Indices3); - m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Set(6, Normal_Index_Indices3); //HACK + m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Set(6, Normal_Index_Indices3); m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Set(6, Normal_Index_Indices3); m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Set(6, Normal_Index_Indices3); } From 660fc129275dc92e4f6f99bb497f83b2da58992a Mon Sep 17 00:00:00 2001 From: Jordan Woyak Date: Thu, 21 Feb 2013 02:00:27 -0600 Subject: [PATCH 07/26] Cleanup Position loader functions. --- .../VideoCommon/Src/VertexLoader_Position.cpp | 266 +++++------------- 1 file changed, 69 insertions(+), 197 deletions(-) diff --git a/Source/Core/VideoCommon/Src/VertexLoader_Position.cpp b/Source/Core/VideoCommon/Src/VertexLoader_Position.cpp index 06481f9ddf..3fa74fb747 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader_Position.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader_Position.cpp @@ -15,6 +15,8 @@ // Official SVN repository and contact information can be found at // http://code.google.com/p/dolphin-emu/ +#include + #include "Common.h" #include "VideoCommon.h" #include "VertexLoader.h" @@ -71,101 +73,52 @@ MOVUPS(MOffset(EDI, 0), XMM0); */ -// ============================================================================== -// Direct -// ============================================================================== - -template -void Pos_ReadDirect() +template +float PosScale(T val) { - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(T)DataRead() * posScale; - ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(T)DataRead() * posScale; - if (three) - ((float*)VertexManager::s_pCurBufferPointer)[2] = (float)(T)DataRead() * posScale; - else - ((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f; + return val * posScale; +} + +template <> +float PosScale(float val) +{ return val; } + +template +LOADERDECL void Pos_ReadDirect() +{ + static_assert(N <= 3, "N > 3 is not sane!"); + + auto const dest = reinterpret_cast(VertexManager::s_pCurBufferPointer); + for (int i = 0; i != N; ++i) + dest[i] = PosScale(DataRead()); + + for (int i = N; i != 3; ++i) + dest[i] = 0.f; + LOG_VTX(); - VertexManager::s_pCurBufferPointer += 12; + VertexManager::s_pCurBufferPointer += sizeof(float) * 3; } -void LOADERDECL Pos_ReadDirect_UByte3() { Pos_ReadDirect(); } -void LOADERDECL Pos_ReadDirect_Byte3() { Pos_ReadDirect(); } -void LOADERDECL Pos_ReadDirect_UShort3() { Pos_ReadDirect(); } -void LOADERDECL Pos_ReadDirect_Short3() { Pos_ReadDirect(); } -void LOADERDECL Pos_ReadDirect_UByte2() { Pos_ReadDirect(); } -void LOADERDECL Pos_ReadDirect_Byte2() { Pos_ReadDirect(); } -void LOADERDECL Pos_ReadDirect_UShort2() { Pos_ReadDirect(); } -void LOADERDECL Pos_ReadDirect_Short2() { Pos_ReadDirect(); } - -void LOADERDECL Pos_ReadDirect_Float3() +template +LOADERDECL void Pos_ReadIndex() { - // No need to use floating point here. - ((u32 *)VertexManager::s_pCurBufferPointer)[0] = DataReadU32(); - ((u32 *)VertexManager::s_pCurBufferPointer)[1] = DataReadU32(); - ((u32 *)VertexManager::s_pCurBufferPointer)[2] = DataReadU32(); - LOG_VTX(); - VertexManager::s_pCurBufferPointer += 12; -} - -void LOADERDECL Pos_ReadDirect_Float2() -{ - // No need to use floating point here. - ((u32 *)VertexManager::s_pCurBufferPointer)[0] = DataReadU32(); - ((u32 *)VertexManager::s_pCurBufferPointer)[1] = DataReadU32(); - ((u32 *)VertexManager::s_pCurBufferPointer)[2] = 0; - LOG_VTX(); - VertexManager::s_pCurBufferPointer += 12; -} - - -template -inline void Pos_ReadIndex_Byte(int Index) -{ - if(Index < MaxSize) + static_assert(!std::numeric_limits::is_signed, "Only unsigned I is sane!"); + static_assert(N <= 3, "N > 3 is not sane!"); + + auto const index = DataRead(); + if (index < std::numeric_limits::max()) { - const u8* pData = cached_arraybases[ARRAY_POSITION] + ((u32)Index * arraystrides[ARRAY_POSITION]); - ((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(T)(pData[0])) * posScale; - ((float*)VertexManager::s_pCurBufferPointer)[1] = ((float)(T)(pData[1])) * posScale; - if (three) - ((float*)VertexManager::s_pCurBufferPointer)[2] = ((float)(T)(pData[2])) * posScale; - else - ((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f; + auto const data = reinterpret_cast(cached_arraybases[ARRAY_POSITION] + (index * arraystrides[ARRAY_POSITION])); + auto const dest = reinterpret_cast(VertexManager::s_pCurBufferPointer); + + for (int i = 0; i != N; ++i) + dest[i] = PosScale(Common::FromBigEndian(data[i])); + + for (int i = N; i != 3; ++i) + dest[i] = 0.f + LOG_VTX(); - VertexManager::s_pCurBufferPointer += 12; - } -} - -template -inline void Pos_ReadIndex_Short(int Index) -{ - if(Index < MaxSize) - { - const u16* pData = (const u16 *)(cached_arraybases[ARRAY_POSITION] + ((u32)Index * arraystrides[ARRAY_POSITION])); - ((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(T)Common::swap16(pData[0])) * posScale; - ((float*)VertexManager::s_pCurBufferPointer)[1] = ((float)(T)Common::swap16(pData[1])) * posScale; - if (three) - ((float*)VertexManager::s_pCurBufferPointer)[2] = ((float)(T)Common::swap16(pData[2])) * posScale; - else - ((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f; - LOG_VTX(); - VertexManager::s_pCurBufferPointer += 12; - } -} - -template -void Pos_ReadIndex_Float(int Index) -{ - if(Index < MaxSize) - { - const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (Index * arraystrides[ARRAY_POSITION])); - ((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]); - ((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]); - if (three) - ((u32*)VertexManager::s_pCurBufferPointer)[2] = Common::swap32(pData[2]); - else - ((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f; - LOG_VTX(); - VertexManager::s_pCurBufferPointer += 12; + VertexManager::s_pCurBufferPointer += sizeof(float) * 3; } } @@ -173,87 +126,22 @@ void Pos_ReadIndex_Float(int Index) static const __m128i kMaskSwap32_3 = _mm_set_epi32(0xFFFFFFFFL, 0x08090A0BL, 0x04050607L, 0x00010203L); static const __m128i kMaskSwap32_2 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x04050607L, 0x00010203L); -template -void Pos_ReadIndex_Float_SSSE3(int Index) +template +LOADERDECL void Pos_ReadIndex_Float_SSSE3() { - if(Index < MaxSize) + auto const index = DataRead(); + if (index < std::numeric_limits::max()) { const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (Index * arraystrides[ARRAY_POSITION])); GC_ALIGNED128(const __m128i a = _mm_loadu_si128((__m128i*)pData)); GC_ALIGNED128(__m128i b = _mm_shuffle_epi8(a, three ? kMaskSwap32_3 : kMaskSwap32_2)); _mm_storeu_si128((__m128i*)VertexManager::s_pCurBufferPointer, b); LOG_VTX(); - VertexManager::s_pCurBufferPointer += 12; + VertexManager::s_pCurBufferPointer += sizeof(float) * 3; } } #endif -// Explicitly instantiate these functions to decrease the possibility of -// symbol binding problems when (only) calling them from JIT compiled code. -template void Pos_ReadDirect(); -template void Pos_ReadDirect(); -template void Pos_ReadDirect(); -template void Pos_ReadDirect(); -template void Pos_ReadDirect(); -template void Pos_ReadDirect(); -template void Pos_ReadDirect(); -template void Pos_ReadDirect(); -template void Pos_ReadIndex_Byte(int Index); -template void Pos_ReadIndex_Byte(int Index); -template void Pos_ReadIndex_Short(int Index); -template void Pos_ReadIndex_Short(int Index); -template void Pos_ReadIndex_Float(int Index); -template void Pos_ReadIndex_Byte(int Index); -template void Pos_ReadIndex_Byte(int Index); -template void Pos_ReadIndex_Short(int Index); -template void Pos_ReadIndex_Short(int Index); -template void Pos_ReadIndex_Float(int Index); -template void Pos_ReadIndex_Byte(int Index); -template void Pos_ReadIndex_Byte(int Index); -template void Pos_ReadIndex_Short(int Index); -template void Pos_ReadIndex_Short(int Index); -template void Pos_ReadIndex_Float(int Index); -template void Pos_ReadIndex_Byte(int Index); -template void Pos_ReadIndex_Byte(int Index); -template void Pos_ReadIndex_Short(int Index); -template void Pos_ReadIndex_Short(int Index); -template void Pos_ReadIndex_Float(int Index); - -// ============================================================================== -// Index 8 -// ============================================================================== -void LOADERDECL Pos_ReadIndex8_UByte3() {Pos_ReadIndex_Byte (DataReadU8());} -void LOADERDECL Pos_ReadIndex8_Byte3() {Pos_ReadIndex_Byte (DataReadU8());} -void LOADERDECL Pos_ReadIndex8_UShort3() {Pos_ReadIndex_Short (DataReadU8());} -void LOADERDECL Pos_ReadIndex8_Short3() {Pos_ReadIndex_Short (DataReadU8());} -void LOADERDECL Pos_ReadIndex8_Float3() {Pos_ReadIndex_Float (DataReadU8());} -void LOADERDECL Pos_ReadIndex8_UByte2() {Pos_ReadIndex_Byte(DataReadU8());} -void LOADERDECL Pos_ReadIndex8_Byte2() {Pos_ReadIndex_Byte(DataReadU8());} -void LOADERDECL Pos_ReadIndex8_UShort2() {Pos_ReadIndex_Short(DataReadU8());} -void LOADERDECL Pos_ReadIndex8_Short2() {Pos_ReadIndex_Short(DataReadU8());} -void LOADERDECL Pos_ReadIndex8_Float2() {Pos_ReadIndex_Float (DataReadU8());} - -// ============================================================================== -// Index 16 -// ============================================================================== -void LOADERDECL Pos_ReadIndex16_UByte3() {Pos_ReadIndex_Byte (DataReadU16());} -void LOADERDECL Pos_ReadIndex16_Byte3() {Pos_ReadIndex_Byte (DataReadU16());} -void LOADERDECL Pos_ReadIndex16_UShort3() {Pos_ReadIndex_Short (DataReadU16());} -void LOADERDECL Pos_ReadIndex16_Short3() {Pos_ReadIndex_Short (DataReadU16());} -void LOADERDECL Pos_ReadIndex16_Float3() {Pos_ReadIndex_Float (DataReadU16());} -void LOADERDECL Pos_ReadIndex16_UByte2() {Pos_ReadIndex_Byte(DataReadU16());} -void LOADERDECL Pos_ReadIndex16_Byte2() {Pos_ReadIndex_Byte(DataReadU16());} -void LOADERDECL Pos_ReadIndex16_UShort2() {Pos_ReadIndex_Short(DataReadU16());} -void LOADERDECL Pos_ReadIndex16_Short2() {Pos_ReadIndex_Short(DataReadU16());} -void LOADERDECL Pos_ReadIndex16_Float2() {Pos_ReadIndex_Float (DataReadU16());} - -#if _M_SSE >= 0x301 -void LOADERDECL Pos_ReadIndex8_Float3_SSSE3() {Pos_ReadIndex_Float_SSSE3 (DataReadU8());} -void LOADERDECL Pos_ReadIndex8_Float2_SSSE3() {Pos_ReadIndex_Float_SSSE3 (DataReadU8());} -void LOADERDECL Pos_ReadIndex16_Float3_SSSE3() {Pos_ReadIndex_Float_SSSE3 (DataReadU16());} -void LOADERDECL Pos_ReadIndex16_Float2_SSSE3() {Pos_ReadIndex_Float_SSSE3 (DataReadU16());} -#endif - static TPipelineFunction tableReadPosition[4][8][2] = { { {NULL, NULL,}, @@ -263,56 +151,40 @@ static TPipelineFunction tableReadPosition[4][8][2] = { {NULL, NULL,}, }, { - {Pos_ReadDirect_UByte2, Pos_ReadDirect_UByte3,}, - {Pos_ReadDirect_Byte2, Pos_ReadDirect_Byte3,}, - {Pos_ReadDirect_UShort2, Pos_ReadDirect_UShort3,}, - {Pos_ReadDirect_Short2, Pos_ReadDirect_Short3,}, - {Pos_ReadDirect_Float2, Pos_ReadDirect_Float3,}, + {Pos_ReadDirect, Pos_ReadDirect,}, + {Pos_ReadDirect, Pos_ReadDirect,}, + {Pos_ReadDirect, Pos_ReadDirect,}, + {Pos_ReadDirect, Pos_ReadDirect,}, + {Pos_ReadDirect, Pos_ReadDirect,}, }, { - {Pos_ReadIndex8_UByte2, Pos_ReadIndex8_UByte3,}, - {Pos_ReadIndex8_Byte2, Pos_ReadIndex8_Byte3,}, - {Pos_ReadIndex8_UShort2, Pos_ReadIndex8_UShort3,}, - {Pos_ReadIndex8_Short2, Pos_ReadIndex8_Short3,}, - {Pos_ReadIndex8_Float2, Pos_ReadIndex8_Float3,}, + {Pos_ReadIndex, Pos_ReadIndex,}, + {Pos_ReadIndex, Pos_ReadIndex,}, + {Pos_ReadIndex, Pos_ReadIndex,}, + {Pos_ReadIndex, Pos_ReadIndex,}, + {Pos_ReadIndex, Pos_ReadIndex,}, }, { - {Pos_ReadIndex16_UByte2, Pos_ReadIndex16_UByte3,}, - {Pos_ReadIndex16_Byte2, Pos_ReadIndex16_Byte3,}, - {Pos_ReadIndex16_UShort2, Pos_ReadIndex16_UShort3,}, - {Pos_ReadIndex16_Short2, Pos_ReadIndex16_Short3,}, - {Pos_ReadIndex16_Float2, Pos_ReadIndex16_Float3,}, + {Pos_ReadIndex, Pos_ReadIndex,}, + {Pos_ReadIndex, Pos_ReadIndex,}, + {Pos_ReadIndex, Pos_ReadIndex,}, + {Pos_ReadIndex, Pos_ReadIndex,}, + {Pos_ReadIndex, Pos_ReadIndex,}, }, }; static int tableReadPositionVertexSize[4][8][2] = { { - {0, 0,}, - {0, 0,}, - {0, 0,}, - {0, 0,}, - {0, 0,}, + {0, 0,}, {0, 0,}, {0, 0,}, {0, 0,}, {0, 0,}, }, { - {2, 3,}, - {2, 3,}, - {4, 6,}, - {4, 6,}, - {8, 12,}, + {2, 3,}, {2, 3,}, {4, 6,}, {4, 6,}, {8, 12,}, }, { - {1, 1,}, - {1, 1,}, - {1, 1,}, - {1, 1,}, - {1, 1,}, + {1, 1,}, {1, 1,}, {1, 1,}, {1, 1,}, {1, 1,}, }, { - {2, 2,}, - {2, 2,}, - {2, 2,}, - {2, 2,}, - {2, 2,}, + {2, 2,}, {2, 2,}, {2, 2,}, {2, 2,}, {2, 2,}, }, }; @@ -322,10 +194,10 @@ void VertexLoader_Position::Init(void) { #if _M_SSE >= 0x301 if (cpu_info.bSSSE3) { - tableReadPosition[2][4][0] = Pos_ReadIndex8_Float2_SSSE3; - tableReadPosition[2][4][1] = Pos_ReadIndex8_Float3_SSSE3; - tableReadPosition[3][4][0] = Pos_ReadIndex16_Float2_SSSE3; - tableReadPosition[3][4][1] = Pos_ReadIndex16_Float3_SSSE3; + tableReadPosition[2][4][0] = Pos_ReadIndex_Float_SSSE3; + tableReadPosition[2][4][1] = Pos_ReadIndex_Float_SSSE3; + tableReadPosition[3][4][0] = Pos_ReadIndex_Float_SSSE3; + tableReadPosition[3][4][1] = Pos_ReadIndex_Float_SSSE3; } #endif From dc776a19a01f730f2d21496a1403fbdda35cbc7e Mon Sep 17 00:00:00 2001 From: Jordan Woyak Date: Thu, 21 Feb 2013 02:49:17 -0600 Subject: [PATCH 08/26] Cleanup VertexLoader's Color functions a bit. --- .../VideoCommon/Src/VertexLoader_Color.cpp | 92 ++++++++----------- 1 file changed, 37 insertions(+), 55 deletions(-) diff --git a/Source/Core/VideoCommon/Src/VertexLoader_Color.cpp b/Source/Core/VideoCommon/Src/VertexLoader_Color.cpp index 9cfa5efc31..ce2c970fb9 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader_Color.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader_Color.cpp @@ -15,9 +15,6 @@ // Official SVN repository and contact information can be found at // http://code.google.com/p/dolphin-emu/ -#ifndef _VERTEXLOADERCOLOR_H -#define _VERTEXLOADERCOLOR_H - #include "Common.h" #include "VideoCommon.h" #include "LookUpTables.h" @@ -132,80 +129,65 @@ void LOADERDECL Color_ReadDirect_32b_8888() _SetCol(col); } - - -void LOADERDECL Color_ReadIndex8_16b_565() +template +void Color_ReadIndex_16b_565() { - u8 Index = DataReadU8(); + auto const Index = DataRead(); u16 val = Common::swap16(*(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]))); _SetCol565(val); } -void LOADERDECL Color_ReadIndex8_24b_888() + +template +void Color_ReadIndex_24b_888() { - u8 Index = DataReadU8(); + auto const Index = DataRead(); const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]); _SetCol(_Read24(iAddress)); } -void LOADERDECL Color_ReadIndex8_32b_888x() + +template +void Color_ReadIndex_32b_888x() { - u8 Index = DataReadU8(); + auto const Index = DataRead(); const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]); _SetCol(_Read24(iAddress)); } -void LOADERDECL Color_ReadIndex8_16b_4444() + +template +void Color_ReadIndex_16b_4444() { - u8 Index = DataReadU8(); + auto const Index = DataRead(); u16 val = *(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex])); _SetCol4444(val); } -void LOADERDECL Color_ReadIndex8_24b_6666() + +template +void Color_ReadIndex_24b_6666() { - u8 Index = DataReadU8(); + auto const Index = DataRead(); const u8* pData = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]) - 1; u32 val = Common::swap32(pData); _SetCol6666(val); } -void LOADERDECL Color_ReadIndex8_32b_8888() + +template +void Color_ReadIndex_32b_8888() { - u8 Index = DataReadU8(); + auto const Index = DataRead(); const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]); _SetCol(_Read32(iAddress)); } -void LOADERDECL Color_ReadIndex16_16b_565() -{ - u16 Index = DataReadU16(); - u16 val = Common::swap16(*(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]))); - _SetCol565(val); -} -void LOADERDECL Color_ReadIndex16_24b_888() -{ - u16 Index = DataReadU16(); - const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]); - _SetCol(_Read24(iAddress)); -} -void LOADERDECL Color_ReadIndex16_32b_888x() -{ - u16 Index = DataReadU16(); - const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]); - _SetCol(_Read24(iAddress)); -} -void LOADERDECL Color_ReadIndex16_16b_4444() -{ - u16 Index = DataReadU16(); - u16 val = *(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex])); - _SetCol4444(val); -} -void LOADERDECL Color_ReadIndex16_24b_6666() -{ - u16 Index = DataReadU16(); - const u8 *pData = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]) - 1; - u32 val = Common::swap32(pData); - _SetCol6666(val); -} -void LOADERDECL Color_ReadIndex16_32b_8888() -{ - u16 Index = DataReadU16(); - const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]); - _SetCol(_Read32(iAddress)); -} -#endif + +void LOADERDECL Color_ReadIndex8_16b_565() { Color_ReadIndex_16b_565(); } +void LOADERDECL Color_ReadIndex8_24b_888() { Color_ReadIndex_24b_888(); } +void LOADERDECL Color_ReadIndex8_32b_888x() { Color_ReadIndex_32b_888x(); } +void LOADERDECL Color_ReadIndex8_16b_4444() { Color_ReadIndex_16b_4444(); } +void LOADERDECL Color_ReadIndex8_24b_6666() { Color_ReadIndex_24b_6666(); } +void LOADERDECL Color_ReadIndex8_32b_8888() { Color_ReadIndex_32b_8888(); } + +void LOADERDECL Color_ReadIndex16_16b_565() { Color_ReadIndex_16b_565(); } +void LOADERDECL Color_ReadIndex16_24b_888() { Color_ReadIndex_24b_888(); } +void LOADERDECL Color_ReadIndex16_32b_888x() { Color_ReadIndex_32b_888x(); } +void LOADERDECL Color_ReadIndex16_16b_4444() { Color_ReadIndex_16b_4444(); } +void LOADERDECL Color_ReadIndex16_24b_6666() { Color_ReadIndex_24b_6666(); } +void LOADERDECL Color_ReadIndex16_32b_8888() { Color_ReadIndex_32b_8888(); } From 7a89faf13f1d02f9f5d2b01ffa33fe6e8b2c6d99 Mon Sep 17 00:00:00 2001 From: degasus Date: Thu, 21 Feb 2013 11:36:10 +0100 Subject: [PATCH 09/26] converting the last vertices again instead of copying from buffer on buffer split for mapping, this buffer must be write only, so we cannot copy anthing. converting again needs more cpu, but should happen rarely --- Source/Core/VideoCommon/Src/VertexLoader.cpp | 63 +++++++++++--------- Source/Core/VideoCommon/Src/VertexLoader.h | 1 + 2 files changed, 36 insertions(+), 28 deletions(-) diff --git a/Source/Core/VideoCommon/Src/VertexLoader.cpp b/Source/Core/VideoCommon/Src/VertexLoader.cpp index 5bffab8ee7..7eb410fc43 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader.cpp @@ -571,7 +571,6 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count) if (remainingVerts < granularity) { INCSTAT(stats.thisFrame.numBufferSplits); // This buffer full - break current primitive and flush, to switch to the next buffer. - u8* plastptr = VertexManager::s_pCurBufferPointer; if (v - startv > 0) VertexManager::AddVertices(primitive, v - startv + extraverts); VertexManager::Flush(); @@ -581,27 +580,28 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count) case 3: // triangle strip, copy last two vertices // a little trick since we have to keep track of signs if (v & 1) { - memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-2*native_stride, native_stride); - memcpy_gc(VertexManager::s_pCurBufferPointer+native_stride, plastptr-native_stride*2, 2*native_stride); - VertexManager::s_pCurBufferPointer += native_stride*3; + g_pVideoData -= m_VertexSize*2; + ConvertVertices(1); + g_pVideoData -= m_VertexSize; + ConvertVertices(2); extraverts = 3; } else { - memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride*2, native_stride*2); - VertexManager::s_pCurBufferPointer += native_stride*2; + g_pVideoData -= m_VertexSize*2; + ConvertVertices(2); extraverts = 2; } break; case 4: // tri fan, copy first and last vert - memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride*(v-startv+extraverts), native_stride); - VertexManager::s_pCurBufferPointer += native_stride; - memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride, native_stride); - VertexManager::s_pCurBufferPointer += native_stride; + g_pVideoData -= m_VertexSize*(v-startv+extraverts); + ConvertVertices(1); + g_pVideoData += m_VertexSize*(v-startv+extraverts-2); + ConvertVertices(1); extraverts = 2; break; case 6: // line strip - memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride, native_stride); - VertexManager::s_pCurBufferPointer += native_stride; + g_pVideoData -= m_VertexSize*1; + ConvertVertices(1); extraverts = 1; break; default: @@ -615,22 +615,8 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count) if (count - v < remainingVerts) remainingVerts = count - v; - #ifdef USE_JIT - if (remainingVerts > 0) { - loop_counter = remainingVerts; - ((void (*)())(void*)m_compiledCode)(); - } - #else - for (int s = 0; s < remainingVerts; s++) - { - tcIndex = 0; - colIndex = 0; - s_texmtxwrite = s_texmtxread = 0; - for (int i = 0; i < m_numPipelineStages; i++) - m_PipelineStages[i](); - PRIM_LOG("\n"); - } - #endif + ConvertVertices(remainingVerts); + v += remainingVerts; } @@ -639,6 +625,27 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count) } +void VertexLoader::ConvertVertices ( int count ) +{ +#ifdef USE_JIT + if (count > 0) { + loop_counter = count; + ((void (*)())(void*)m_compiledCode)(); + } +#else + for (int s = 0; s < count; s++) + { + tcIndex = 0; + colIndex = 0; + s_texmtxwrite = s_texmtxread = 0; + for (int i = 0; i < m_numPipelineStages; i++) + m_PipelineStages[i](); + PRIM_LOG("\n"); + } +#endif +} + + void VertexLoader::RunCompiledVertices(int vtx_attr_group, int primitive, int count, u8* Data) diff --git a/Source/Core/VideoCommon/Src/VertexLoader.h b/Source/Core/VideoCommon/Src/VertexLoader.h index 98a57cb9ff..4f4fc19e99 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader.h +++ b/Source/Core/VideoCommon/Src/VertexLoader.h @@ -119,6 +119,7 @@ private: void SetVAT(u32 _group0, u32 _group1, u32 _group2); void CompileVertexTranslator(); + void ConvertVertices(int count); void WriteCall(TPipelineFunction); From 0d33e200260978ebeb159e84fbf6a792363b2574 Mon Sep 17 00:00:00 2001 From: degasus Date: Thu, 21 Feb 2013 11:41:14 +0100 Subject: [PATCH 10/26] bbox: replace s_pCurBufferPointer with locale buffer, so it can be read without read from the writeonly buffer --- Source/Core/VideoCommon/Src/VertexLoader.cpp | 40 +++++++++++++++----- 1 file changed, 30 insertions(+), 10 deletions(-) diff --git a/Source/Core/VideoCommon/Src/VertexLoader.cpp b/Source/Core/VideoCommon/Src/VertexLoader.cpp index 7eb410fc43..23064dec9a 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader.cpp @@ -72,6 +72,10 @@ int colElements[2]; float posScale; float tcScale[8]; +// bbox must read vertex position, so convert it to this buffer +static float s_bbox_vertex_buffer[3]; +static u8 *s_bbox_pCurBufferPointer_orig; + static const float fractionTable[32] = { 1.0f / (1U << 0), 1.0f / (1U << 1), 1.0f / (1U << 2), 1.0f / (1U << 3), 1.0f / (1U << 4), 1.0f / (1U << 5), 1.0f / (1U << 6), 1.0f / (1U << 7), @@ -99,17 +103,32 @@ void LOADERDECL PosMtx_Write() *VertexManager::s_pCurBufferPointer++ = 0; } +void LOADERDECL UpdateBoundingBoxPrepare() +{ + if (!PixelEngine::bbox_active) + return; + + // set our buffer as videodata buffer, so we will get a copy of the vertex positions + // this is a big hack, but so we can use the same converting function then without bbox + s_bbox_pCurBufferPointer_orig = VertexManager::s_pCurBufferPointer; + VertexManager::s_pCurBufferPointer = (u8*)s_bbox_vertex_buffer; +} + void LOADERDECL UpdateBoundingBox() { if (!PixelEngine::bbox_active) return; + + // reset videodata pointer + VertexManager::s_pCurBufferPointer = s_bbox_pCurBufferPointer_orig; + + // copy vertex pointers + memcpy(VertexManager::s_pCurBufferPointer, s_bbox_vertex_buffer, 12); + VertexManager::s_pCurBufferPointer += 12; - // Truly evil hack, reading backwards from the write pointer. If we were writing to write-only - // memory like we might have been with a D3D vertex buffer, this would have been a bad idea. - float *data = (float *)(VertexManager::s_pCurBufferPointer - 12); // We must transform the just loaded point by the current world and projection matrix - in software. // Then convert to screen space and update the bounding box. - float p[3] = {data[0], data[1], data[2]}; + float p[3] = {s_bbox_vertex_buffer[0], s_bbox_vertex_buffer[1], s_bbox_vertex_buffer[2]}; const float *world_matrix = (float*)xfmem + MatrixIndexA.PosNormalMtxIdx * 4; const float *proj_matrix = &g_fProjectionMatrix[0]; @@ -267,15 +286,16 @@ void VertexLoader::CompileVertexTranslator() if (m_VtxDesc.Tex7MatIdx) {m_VertexSize += 1; m_NativeFmt->m_components |= VB_HAS_TEXMTXIDX7; WriteCall(TexMtx_ReadDirect_UByte); } // Write vertex position loader - WriteCall(VertexLoader_Position::GetFunction(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements)); + if(g_ActiveConfig.bUseBBox) { + WriteCall(UpdateBoundingBoxPrepare); + WriteCall(VertexLoader_Position::GetFunction(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements)); + WriteCall(UpdateBoundingBox); + } else { + WriteCall(VertexLoader_Position::GetFunction(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements)); + } m_VertexSize += VertexLoader_Position::GetSize(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements); nat_offset += 12; - // OK, so we just got a point. Let's go back and read it for the bounding box. - - if(g_ActiveConfig.bUseBBox) - WriteCall(UpdateBoundingBox); - // Normals vtx_decl.num_normals = 0; if (m_VtxDesc.Normal != NOT_PRESENT) From 66d0c1c301c1a40414a0135d94525dab5198a8eb Mon Sep 17 00:00:00 2001 From: degasus Date: Thu, 21 Feb 2013 11:45:29 +0100 Subject: [PATCH 11/26] small cleanups of s_pCurBufferPointer --- Source/Core/VideoCommon/Src/DLCache.cpp | 3 +-- Source/Core/VideoCommon/Src/VertexLoader_Normal.cpp | 1 + Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp | 7 +++---- Source/Core/VideoCommon/Src/VideoCommon.h | 2 +- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/Source/Core/VideoCommon/Src/DLCache.cpp b/Source/Core/VideoCommon/Src/DLCache.cpp index c828ea3a03..86d321114c 100644 --- a/Source/Core/VideoCommon/Src/DLCache.cpp +++ b/Source/Core/VideoCommon/Src/DLCache.cpp @@ -550,8 +550,7 @@ void CompileAndRunDisplayList(u32 address, u32 size, CachedDisplayList *dl) cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7) (cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT, numVertices); - u8* EndAddress = VertexManager::s_pCurBufferPointer; - u32 Vdatasize = (u32)(EndAddress - StartAddress); + u32 Vdatasize = (u32)(VertexManager::s_pCurBufferPointer - StartAddress); if (Vdatasize > 0) { // Compile diff --git a/Source/Core/VideoCommon/Src/VertexLoader_Normal.cpp b/Source/Core/VideoCommon/Src/VertexLoader_Normal.cpp index 830bd3de13..82d04f5ea2 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader_Normal.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader_Normal.cpp @@ -30,6 +30,7 @@ #include #endif +// warning: mapping buffer should be disabled to use this #define LOG_NORM() // PRIM_LOG("norm: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-3], ((float*)VertexManager::s_pCurBufferPointer)[-2], ((float*)VertexManager::s_pCurBufferPointer)[-1]); VertexLoader_Normal::Set VertexLoader_Normal::m_Table[NUM_NRM_TYPE][NUM_NRM_INDICES][NUM_NRM_ELEMENTS][NUM_NRM_FORMAT]; diff --git a/Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp b/Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp index ba3bb73f43..fd6f191ae3 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp @@ -28,6 +28,7 @@ #include #endif +// warning: mapping buffer should be disabled to use this #define LOG_TEX1() // PRIM_LOG("tex: %f, ", ((float*)VertexManager::s_pCurBufferPointer)[0]); #define LOG_TEX2() // PRIM_LOG("tex: %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[0], ((float*)VertexManager::s_pCurBufferPointer)[1]); @@ -355,11 +356,9 @@ void LOADERDECL TexCoord_ReadIndex16_Float2_SSSE3() const u32 *pData = (const u32 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex])); GC_ALIGNED128(const __m128i a = _mm_loadl_epi64((__m128i*)pData)); GC_ALIGNED128(const __m128i b = _mm_shuffle_epi8(a, kMaskSwap32)); - u8* p = VertexManager::s_pCurBufferPointer; - _mm_storel_epi64((__m128i*)p, b); + _mm_storel_epi64((__m128i*)VertexManager::s_pCurBufferPointer, b); LOG_TEX2(); - p += 8; - VertexManager::s_pCurBufferPointer = p; + VertexManager::s_pCurBufferPointer += 8; tcIndex++; } #endif diff --git a/Source/Core/VideoCommon/Src/VideoCommon.h b/Source/Core/VideoCommon/Src/VideoCommon.h index 1eeed20b82..7681d8ba82 100644 --- a/Source/Core/VideoCommon/Src/VideoCommon.h +++ b/Source/Core/VideoCommon/Src/VideoCommon.h @@ -90,7 +90,7 @@ struct TargetRectangle : public MathUtil::Rectangle #define PRIM_LOG(...) DEBUG_LOG(VIDEO, ##__VA_ARGS__) #endif - +// warning: mapping buffer should be disabled to use this // #define LOG_VTX() DEBUG_LOG(VIDEO, "vtx: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[0], ((float*)VertexManager::s_pCurBufferPointer)[1], ((float*)VertexManager::s_pCurBufferPointer)[2]); #define LOG_VTX() From 76e6085e31920b2521074ea871efd6a4129b3614 Mon Sep 17 00:00:00 2001 From: degasus Date: Thu, 21 Feb 2013 12:36:29 +0100 Subject: [PATCH 12/26] consistently usage of buffer pointers --- Source/Core/VideoCommon/Src/VertexManagerBase.cpp | 10 ++++++---- Source/Core/VideoCommon/Src/VertexManagerBase.h | 8 +++++--- Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp | 6 +++--- Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp | 10 +++++----- Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp | 6 +++--- 5 files changed, 22 insertions(+), 18 deletions(-) diff --git a/Source/Core/VideoCommon/Src/VertexManagerBase.cpp b/Source/Core/VideoCommon/Src/VertexManagerBase.cpp index cd5a01c6b3..6fd1898db6 100644 --- a/Source/Core/VideoCommon/Src/VertexManagerBase.cpp +++ b/Source/Core/VideoCommon/Src/VertexManagerBase.cpp @@ -16,8 +16,9 @@ VertexManager *g_vertex_manager; -u8 *VertexManager::s_pCurBufferPointer; u8 *VertexManager::s_pBaseBufferPointer; +u8 *VertexManager::s_pCurBufferPointer; +u8 *VertexManager::s_pEndBufferPointer; u8 *VertexManager::LocalVBuffer; u16 *VertexManager::TIBuffer; @@ -32,6 +33,7 @@ VertexManager::VertexManager() LocalVBuffer = new u8[MAXVBUFFERSIZE]; s_pCurBufferPointer = s_pBaseBufferPointer = LocalVBuffer; + s_pEndBufferPointer = s_pBaseBufferPointer + MAXVBUFFERSIZE; TIBuffer = new u16[MAXIBUFFERSIZE]; LIBuffer = new u16[MAXIBUFFERSIZE]; @@ -42,7 +44,7 @@ VertexManager::VertexManager() void VertexManager::ResetBuffer() { - s_pCurBufferPointer = LocalVBuffer; + s_pCurBufferPointer = s_pBaseBufferPointer; } VertexManager::~VertexManager() @@ -87,7 +89,7 @@ void VertexManager::AddIndices(int primitive, int numVertices) int VertexManager::GetRemainingSize() { - return MAXVBUFFERSIZE - (int)(s_pCurBufferPointer - LocalVBuffer); + return (int)(s_pEndBufferPointer - s_pCurBufferPointer); } int VertexManager::GetRemainingVertices(int primitive) @@ -170,7 +172,7 @@ void VertexManager::Flush() #if (0) void VertexManager::Flush() { - if (LocalVBuffer == s_pCurBufferPointer || Flushed) + if (s_pBaseBufferPointer == s_pCurBufferPointer || Flushed) return; Flushed = true; diff --git a/Source/Core/VideoCommon/Src/VertexManagerBase.h b/Source/Core/VideoCommon/Src/VertexManagerBase.h index f3a4aa72e3..d4608630ca 100644 --- a/Source/Core/VideoCommon/Src/VertexManagerBase.h +++ b/Source/Core/VideoCommon/Src/VertexManagerBase.h @@ -30,8 +30,9 @@ public: static void AddVertices(int _primitive, int _numVertices); // TODO: protected? - static u8 *s_pCurBufferPointer; static u8 *s_pBaseBufferPointer; + static u8 *s_pCurBufferPointer; + static u8 *s_pEndBufferPointer; static int GetRemainingSize(); static int GetRemainingVertices(int primitive); @@ -43,7 +44,7 @@ public: static u16* GetTriangleIndexBuffer() { return TIBuffer; } static u16* GetLineIndexBuffer() { return LIBuffer; } static u16* GetPointIndexBuffer() { return PIBuffer; } - static u8* GetVertexBuffer() { return LocalVBuffer; } + static u8* GetVertexBuffer() { return s_pBaseBufferPointer; } static void DoState(PointerWrap& p); virtual void CreateDeviceObjects(){}; @@ -52,7 +53,6 @@ protected: // TODO: make private after Flush() is merged static void ResetBuffer(); - static u8 *LocalVBuffer; static u16 *TIBuffer; static u16 *LIBuffer; static u16 *PIBuffer; @@ -67,6 +67,8 @@ private: //virtual void Draw(u32 stride, bool alphapass) = 0; // temp virtual void vFlush() = 0; + + static u8 *LocalVBuffer; }; diff --git a/Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp b/Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp index ab8ed68654..b227a4d27b 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp @@ -104,7 +104,7 @@ void VertexManager::LoadBuffers() { D3D11_MAPPED_SUBRESOURCE map; - UINT vSize = UINT(s_pCurBufferPointer - LocalVBuffer); + UINT vSize = UINT(s_pCurBufferPointer - s_pBaseBufferPointer); D3D11_MAP MapType = D3D11_MAP_WRITE_NO_OVERWRITE; if (m_vertexBufferCursor + vSize >= VBUFFER_SIZE) { @@ -116,7 +116,7 @@ void VertexManager::LoadBuffers() D3D::context->Map(m_vertexBuffers[m_activeVertexBuffer], 0, MapType, 0, &map); - memcpy((u8*)map.pData + m_vertexBufferCursor, LocalVBuffer, vSize); + memcpy((u8*)map.pData + m_vertexBufferCursor, s_pBaseBufferPointer, vSize); D3D::context->Unmap(m_vertexBuffers[m_activeVertexBuffer], 0); m_vertexDrawOffset = m_vertexBufferCursor; m_vertexBufferCursor += vSize; @@ -211,7 +211,7 @@ void VertexManager::Draw(UINT stride) void VertexManager::vFlush() { - if (LocalVBuffer == s_pCurBufferPointer) return; + if (s_pBaseBufferPointer == s_pCurBufferPointer) return; if (Flushed) return; Flushed=true; VideoFifo_CheckEFBAccess(); diff --git a/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp b/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp index 661cf36e76..5ca6018fb1 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp @@ -173,7 +173,7 @@ void VertexManager::PrepareVBuffers(int stride) DestroyDeviceObjects(); return; } - memcpy(pVertices, LocalVBuffer, datasize); + memcpy(pVertices, s_pBaseBufferPointer, datasize); VBuffers[CurrentVBuffer]->Unlock(); LockMode = D3DLOCK_NOOVERWRITE; @@ -268,7 +268,7 @@ void VertexManager::DrawVA(int stride) 0, IndexGenerator::GetNumVerts(), IndexGenerator::GetNumTriangles(), TIBuffer, D3DFMT_INDEX16, - LocalVBuffer, + s_pBaseBufferPointer, stride))) { DumpBadShaders(); @@ -282,7 +282,7 @@ void VertexManager::DrawVA(int stride) 0, IndexGenerator::GetNumVerts(), IndexGenerator::GetNumLines(), LIBuffer, D3DFMT_INDEX16, - LocalVBuffer, + s_pBaseBufferPointer, stride))) { DumpBadShaders(); @@ -296,7 +296,7 @@ void VertexManager::DrawVA(int stride) 0, IndexGenerator::GetNumVerts(), IndexGenerator::GetNumPoints(), PIBuffer, D3DFMT_INDEX16, - LocalVBuffer, + s_pBaseBufferPointer, stride))) { DumpBadShaders(); @@ -307,7 +307,7 @@ void VertexManager::DrawVA(int stride) void VertexManager::vFlush() { - if (LocalVBuffer == s_pCurBufferPointer) return; + if (s_pBaseBufferPointer == s_pCurBufferPointer) return; if (Flushed) return; Flushed = true; VideoFifo_CheckEFBAccess(); diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp b/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp index 6ea10fc077..5a07f42662 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp @@ -100,7 +100,7 @@ void VertexManager::Draw() void VertexManager::vFlush() { - if (LocalVBuffer == s_pCurBufferPointer) return; + if (s_pBaseBufferPointer == s_pCurBufferPointer) return; if (Flushed) return; Flushed=true; VideoFifo_CheckEFBAccess(); @@ -135,7 +135,7 @@ void VertexManager::vFlush() (void)GL_REPORT_ERROR(); //glBindBuffer(GL_ARRAY_BUFFER, s_vboBuffers[s_nCurVBOIndex]); - //glBufferData(GL_ARRAY_BUFFER, s_pCurBufferPointer - LocalVBuffer, LocalVBuffer, GL_STREAM_DRAW); + //glBufferData(GL_ARRAY_BUFFER, s_pCurBufferPointer - s_pBaseBufferPointer, s_pBaseBufferPointer, GL_STREAM_DRAW); GL_REPORT_ERRORD(); // setup the pointers @@ -240,7 +240,7 @@ void VertexManager::vFlush() GFX_DEBUGGER_PAUSE_AT(NEXT_FLUSH, true); //s_nCurVBOIndex = (s_nCurVBOIndex + 1) % ARRAYSIZE(s_vboBuffers); - s_pCurBufferPointer = LocalVBuffer; + s_pCurBufferPointer = s_pBaseBufferPointer; IndexGenerator::Start(TIBuffer,LIBuffer,PIBuffer); #if defined(_DEBUG) || defined(DEBUGFAST) From 4b4dce1bd9bba3d1d12d23343d58c4040ae09c3a Mon Sep 17 00:00:00 2001 From: degasus Date: Thu, 21 Feb 2013 13:00:19 +0100 Subject: [PATCH 13/26] build fix --- Source/Core/VideoCommon/Src/VertexLoader_Position.cpp | 8 ++++---- Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Source/Core/VideoCommon/Src/VertexLoader_Position.cpp b/Source/Core/VideoCommon/Src/VertexLoader_Position.cpp index 3fa74fb747..14f2819260 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader_Position.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader_Position.cpp @@ -84,7 +84,7 @@ float PosScale(float val) { return val; } template -LOADERDECL void Pos_ReadDirect() +void LOADERDECL Pos_ReadDirect() { static_assert(N <= 3, "N > 3 is not sane!"); @@ -100,7 +100,7 @@ LOADERDECL void Pos_ReadDirect() } template -LOADERDECL void Pos_ReadIndex() +void LOADERDECL Pos_ReadIndex() { static_assert(!std::numeric_limits::is_signed, "Only unsigned I is sane!"); static_assert(N <= 3, "N > 3 is not sane!"); @@ -127,12 +127,12 @@ static const __m128i kMaskSwap32_3 = _mm_set_epi32(0xFFFFFFFFL, 0x08090A0BL, 0x0 static const __m128i kMaskSwap32_2 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x04050607L, 0x00010203L); template -LOADERDECL void Pos_ReadIndex_Float_SSSE3() +void LOADERDECL Pos_ReadIndex_Float_SSSE3() { auto const index = DataRead(); if (index < std::numeric_limits::max()) { - const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (Index * arraystrides[ARRAY_POSITION])); + const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (index * arraystrides[ARRAY_POSITION])); GC_ALIGNED128(const __m128i a = _mm_loadu_si128((__m128i*)pData)); GC_ALIGNED128(__m128i b = _mm_shuffle_epi8(a, three ? kMaskSwap32_3 : kMaskSwap32_2)); _mm_storeu_si128((__m128i*)VertexManager::s_pCurBufferPointer, b); diff --git a/Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp b/Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp index 33efe33bdf..52f9d26ad3 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp @@ -112,7 +112,7 @@ void LOADERDECL TexCoord_ReadIndex16_Short2_SSE4() const __m128 e = _mm_load1_ps(&tcScale[tcIndex]); const __m128 f = _mm_mul_ps(d, e); _mm_storeu_ps((float*)VertexManager::s_pCurBufferPointer, f); - LOG_TEX2(); + LOG_TEX<2>(); VertexManager::s_pCurBufferPointer += 8; tcIndex++; } @@ -128,7 +128,7 @@ void LOADERDECL TexCoord_ReadIndex16_Float2_SSSE3() GC_ALIGNED128(const __m128i a = _mm_loadl_epi64((__m128i*)pData)); GC_ALIGNED128(const __m128i b = _mm_shuffle_epi8(a, kMaskSwap32)); _mm_storel_epi64((__m128i*)VertexManager::s_pCurBufferPointer, b); - LOG_TEX2(); + LOG_TEX<2>(); VertexManager::s_pCurBufferPointer += 8; tcIndex++; } From c7f4d6b9ac9860532b8c84d6fde5ba4b80dca08d Mon Sep 17 00:00:00 2001 From: degasus Date: Thu, 21 Feb 2013 13:45:48 +0100 Subject: [PATCH 14/26] wrapper for s_pCurBufferPointer --- Source/Core/VideoCommon/Src/DataReader.h | 9 +++++++++ .../VideoCommon/Src/VertexLoader_Color.cpp | 3 +-- .../VideoCommon/Src/VertexLoader_Normal.cpp | 7 ++----- .../VideoCommon/Src/VertexLoader_Position.cpp | 20 +++++-------------- .../Src/VertexLoader_TextCoord.cpp | 19 ++++++------------ Source/Core/VideoCommon/Src/VideoCommon.h | 2 +- 6 files changed, 24 insertions(+), 36 deletions(-) diff --git a/Source/Core/VideoCommon/Src/DataReader.h b/Source/Core/VideoCommon/Src/DataReader.h index 00dca77134..03061229c0 100644 --- a/Source/Core/VideoCommon/Src/DataReader.h +++ b/Source/Core/VideoCommon/Src/DataReader.h @@ -20,6 +20,8 @@ #ifndef _DATAREADER_H #define _DATAREADER_H +#include "VertexManagerBase.h" + extern u8* g_pVideoData; #if _M_SSE >= 0x301 && !(defined __GNUC__ && !defined __SSSE3__) @@ -145,4 +147,11 @@ __forceinline u8* DataGetPosition() return g_pVideoData; } +template +__forceinline void DataWrite(T data) +{ + *(T*)VertexManager::s_pCurBufferPointer = data; + VertexManager::s_pCurBufferPointer += sizeof(T); +} + #endif diff --git a/Source/Core/VideoCommon/Src/VertexLoader_Color.cpp b/Source/Core/VideoCommon/Src/VertexLoader_Color.cpp index ce2c970fb9..fa1ecbe973 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader_Color.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader_Color.cpp @@ -34,8 +34,7 @@ extern int colElements[2]; __forceinline void _SetCol(u32 val) { - *(u32*)VertexManager::s_pCurBufferPointer = val; - VertexManager::s_pCurBufferPointer += 4; + DataWrite(val); colIndex++; } diff --git a/Source/Core/VideoCommon/Src/VertexLoader_Normal.cpp b/Source/Core/VideoCommon/Src/VertexLoader_Normal.cpp index 4102fdff69..0b7a37926e 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader_Normal.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader_Normal.cpp @@ -59,16 +59,13 @@ template inline void ReadIndirect(const T* data) { static_assert(3 == N || 9 == N, "N is only sane as 3 or 9!"); - - auto const dest = reinterpret_cast(VertexManager::s_pCurBufferPointer); for (int i = 0; i != N; ++i) { - dest[i] = FracAdjust(Common::FromBigEndian(data[i])); - LOG_NORM(); + DataWrite(FracAdjust(Common::FromBigEndian(data[i]))); } - VertexManager::s_pCurBufferPointer += sizeof(float) * N; + LOG_NORM(); } template diff --git a/Source/Core/VideoCommon/Src/VertexLoader_Position.cpp b/Source/Core/VideoCommon/Src/VertexLoader_Position.cpp index 14f2819260..ce7a38b8c3 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader_Position.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader_Position.cpp @@ -88,15 +88,10 @@ void LOADERDECL Pos_ReadDirect() { static_assert(N <= 3, "N > 3 is not sane!"); - auto const dest = reinterpret_cast(VertexManager::s_pCurBufferPointer); - for (int i = 0; i != N; ++i) - dest[i] = PosScale(DataRead()); - - for (int i = N; i != 3; ++i) - dest[i] = 0.f; + for (int i = 0; i < 3; ++i) + DataWrite(i()) : 0.f); LOG_VTX(); - VertexManager::s_pCurBufferPointer += sizeof(float) * 3; } template @@ -109,16 +104,11 @@ void LOADERDECL Pos_ReadIndex() if (index < std::numeric_limits::max()) { auto const data = reinterpret_cast(cached_arraybases[ARRAY_POSITION] + (index * arraystrides[ARRAY_POSITION])); - auto const dest = reinterpret_cast(VertexManager::s_pCurBufferPointer); - for (int i = 0; i != N; ++i) - dest[i] = PosScale(Common::FromBigEndian(data[i])); - - for (int i = N; i != 3; ++i) - dest[i] = 0.f + for (int i = 0; i < 3; ++i) + DataWrite(i __forceinline void LOG_TEX<1>() { // warning: mapping buffer should be disabled to use this - // PRIM_LOG("tex: %f, ", ((float*)VertexManager::s_pCurBufferPointer)[0]); + // PRIM_LOG("tex: %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-1]); } template <> __forceinline void LOG_TEX<2>() { // warning: mapping buffer should be disabled to use this - // PRIM_LOG("tex: %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[0], ((float*)VertexManager::s_pCurBufferPointer)[1]); + // PRIM_LOG("tex: %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-2], ((float*)VertexManager::s_pCurBufferPointer)[-1]); } extern int tcIndex; @@ -66,14 +66,11 @@ float TCScale(float val) template void LOADERDECL TexCoord_ReadDirect() { - auto const dest = reinterpret_cast(VertexManager::s_pCurBufferPointer); - for (int i = 0; i != N; ++i) - dest[i] = TCScale(DataRead()); + DataWrite(TCScale(DataRead())); LOG_TEX(); - VertexManager::s_pCurBufferPointer += sizeof(float) * N; ++tcIndex; } @@ -86,14 +83,10 @@ void LOADERDECL TexCoord_ReadIndex() auto const data = reinterpret_cast(cached_arraybases[ARRAY_TEXCOORD0 + tcIndex] + (index * arraystrides[ARRAY_TEXCOORD0 + tcIndex])); - auto const dest = reinterpret_cast(VertexManager::s_pCurBufferPointer); - for (int i = 0; i != N; ++i) - dest[i] = TCScale(Common::FromBigEndian(data[i])); + DataWrite(TCScale(Common::FromBigEndian(data[i]))); LOG_TEX(); - - VertexManager::s_pCurBufferPointer += sizeof(float) * N; ++tcIndex; } @@ -112,8 +105,8 @@ void LOADERDECL TexCoord_ReadIndex16_Short2_SSE4() const __m128 e = _mm_load1_ps(&tcScale[tcIndex]); const __m128 f = _mm_mul_ps(d, e); _mm_storeu_ps((float*)VertexManager::s_pCurBufferPointer, f); - LOG_TEX<2>(); VertexManager::s_pCurBufferPointer += 8; + LOG_TEX<2>(); tcIndex++; } #endif @@ -128,8 +121,8 @@ void LOADERDECL TexCoord_ReadIndex16_Float2_SSSE3() GC_ALIGNED128(const __m128i a = _mm_loadl_epi64((__m128i*)pData)); GC_ALIGNED128(const __m128i b = _mm_shuffle_epi8(a, kMaskSwap32)); _mm_storel_epi64((__m128i*)VertexManager::s_pCurBufferPointer, b); - LOG_TEX<2>(); VertexManager::s_pCurBufferPointer += 8; + LOG_TEX<2>(); tcIndex++; } #endif diff --git a/Source/Core/VideoCommon/Src/VideoCommon.h b/Source/Core/VideoCommon/Src/VideoCommon.h index 7681d8ba82..b15d539611 100644 --- a/Source/Core/VideoCommon/Src/VideoCommon.h +++ b/Source/Core/VideoCommon/Src/VideoCommon.h @@ -91,7 +91,7 @@ struct TargetRectangle : public MathUtil::Rectangle #endif // warning: mapping buffer should be disabled to use this -// #define LOG_VTX() DEBUG_LOG(VIDEO, "vtx: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[0], ((float*)VertexManager::s_pCurBufferPointer)[1], ((float*)VertexManager::s_pCurBufferPointer)[2]); +// #define LOG_VTX() DEBUG_LOG(VIDEO, "vtx: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-3], ((float*)VertexManager::s_pCurBufferPointer)[-2], ((float*)VertexManager::s_pCurBufferPointer)[-1]); #define LOG_VTX() From 7987d6babbc584956228272e65badf0bb179a2ea Mon Sep 17 00:00:00 2001 From: Jordan Woyak Date: Thu, 21 Feb 2013 12:42:09 -0600 Subject: [PATCH 15/26] Use templates for 8+16 bit indexed versions of SSSE3/SSE4 TextCoord reading. --- .../Src/VertexLoader_TextCoord.cpp | 34 ++++++++++++------- 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp b/Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp index d7a615333c..4be24640b3 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp @@ -93,11 +93,14 @@ void LOADERDECL TexCoord_ReadIndex() #if _M_SSE >= 0x401 static const __m128i kMaskSwap16_2 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFL, 0x02030001L); -void LOADERDECL TexCoord_ReadIndex16_Short2_SSE4() +template +void LOADERDECL TexCoord_ReadIndex_Short2_SSE4() { + static_assert(!std::numeric_limits::is_signed, "Only unsigned I is sane!"); + // Heavy in ZWW - u16 Index = DataReadU16(); - const s32 *pData = (const s32*)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex])); + auto const index = DataRead(); + const s32 *pData = (const s32*)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (index * arraystrides[ARRAY_TEXCOORD0+tcIndex])); const __m128i a = _mm_cvtsi32_si128(*pData); const __m128i b = _mm_shuffle_epi8(a, kMaskSwap16_2); const __m128i c = _mm_cvtepi16_epi32(b); @@ -105,7 +108,7 @@ void LOADERDECL TexCoord_ReadIndex16_Short2_SSE4() const __m128 e = _mm_load1_ps(&tcScale[tcIndex]); const __m128 f = _mm_mul_ps(d, e); _mm_storeu_ps((float*)VertexManager::s_pCurBufferPointer, f); - VertexManager::s_pCurBufferPointer += 8; + VertexManager::s_pCurBufferPointer += sizeof(float) * 2; LOG_TEX<2>(); tcIndex++; } @@ -114,14 +117,17 @@ void LOADERDECL TexCoord_ReadIndex16_Short2_SSE4() #if _M_SSE >= 0x301 static const __m128i kMaskSwap32 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x04050607L, 0x00010203L); -void LOADERDECL TexCoord_ReadIndex16_Float2_SSSE3() +template +void LOADERDECL TexCoord_ReadIndex_Float2_SSSE3() { - u16 Index = DataReadU16(); - const u32 *pData = (const u32 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex])); + static_assert(!std::numeric_limits::is_signed, "Only unsigned I is sane!"); + + auto const index = DataRead(); + const u32 *pData = (const u32 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (index * arraystrides[ARRAY_TEXCOORD0+tcIndex])); GC_ALIGNED128(const __m128i a = _mm_loadl_epi64((__m128i*)pData)); GC_ALIGNED128(const __m128i b = _mm_shuffle_epi8(a, kMaskSwap32)); _mm_storel_epi64((__m128i*)VertexManager::s_pCurBufferPointer, b); - VertexManager::s_pCurBufferPointer += 8; + VertexManager::s_pCurBufferPointer += sizeof(float) * 2; LOG_TEX<2>(); tcIndex++; } @@ -177,16 +183,20 @@ void VertexLoader_TextCoord::Init(void) { #if _M_SSE >= 0x301 - if (cpu_info.bSSSE3) { - tableReadTexCoord[3][4][1] = TexCoord_ReadIndex16_Float2_SSSE3; + if (cpu_info.bSSSE3) + { + tableReadTexCoord[2][4][1] = TexCoord_ReadIndex_Float2_SSSE3; + tableReadTexCoord[3][4][1] = TexCoord_ReadIndex_Float2_SSSE3; } #endif #if _M_SSE >= 0x401 - if (cpu_info.bSSE4_1) { - tableReadTexCoord[3][3][1] = TexCoord_ReadIndex16_Short2_SSE4; + if (cpu_info.bSSE4_1) + { + tableReadTexCoord[2][3][1] = TexCoord_ReadIndex_Short2_SSE4; + tableReadTexCoord[3][3][1] = TexCoord_ReadIndex_Short2_SSE4; } #endif From a92c93f8702802cf152d41b680a164ede35d1016 Mon Sep 17 00:00:00 2001 From: Jordan Woyak Date: Thu, 21 Feb 2013 13:25:35 -0600 Subject: [PATCH 16/26] Automate some calculations, with templates! --- .../VideoCommon/Src/VertexLoader_Normal.cpp | 182 ++++++++++-------- .../VideoCommon/Src/VertexLoader_Normal.h | 13 +- 2 files changed, 109 insertions(+), 86 deletions(-) diff --git a/Source/Core/VideoCommon/Src/VertexLoader_Normal.cpp b/Source/Core/VideoCommon/Src/VertexLoader_Normal.cpp index 0b7a37926e..7e016828be 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader_Normal.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader_Normal.cpp @@ -40,7 +40,7 @@ namespace { template -float FracAdjust(T val) +__forceinline float FracAdjust(T val) { //auto const S8FRAC = 1.f / (1u << 6); //auto const U8FRAC = 1.f / (1u << 7); @@ -52,11 +52,11 @@ float FracAdjust(T val) } template <> -float FracAdjust(float val) +__forceinline float FracAdjust(float val) { return val; } template -inline void ReadIndirect(const T* data) +__forceinline void ReadIndirect(const T* data) { static_assert(3 == N || 9 == N, "N is only sane as 3 or 9!"); @@ -69,15 +69,20 @@ inline void ReadIndirect(const T* data) } template -void LOADERDECL Normal_Direct() +struct Normal_Direct { - auto const source = reinterpret_cast(DataGetPosition()); - ReadIndirect(source); - DataSkip(); -} + static void LOADERDECL function() + { + auto const source = reinterpret_cast(DataGetPosition()); + ReadIndirect(source); + DataSkip(); + } + + static const int size = sizeof(T) * N * 3; +}; template -inline void Normal_Index_Offset() +__forceinline void Normal_Index_Offset() { static_assert(!std::numeric_limits::is_signed, "Only unsigned I is sane!"); @@ -88,88 +93,101 @@ inline void Normal_Index_Offset() } template -void LOADERDECL Normal_Index() +struct Normal_Index { - Normal_Index_Offset(); -} + static void LOADERDECL function() + { + Normal_Index_Offset(); + } + + static const int size = sizeof(I); +}; template -void LOADERDECL Normal_Index_Indices3() +struct Normal_Index_Indices3 { - Normal_Index_Offset(); - Normal_Index_Offset(); - Normal_Index_Offset(); -} + static void LOADERDECL function() + { + Normal_Index_Offset(); + Normal_Index_Offset(); + Normal_Index_Offset(); + } + + static const int size = sizeof(I) * 3; +}; } void VertexLoader_Normal::Init(void) { - m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Set(3, Normal_Direct); - m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Set(3, Normal_Direct); - m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Set(6, Normal_Direct); - m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Set(6, Normal_Direct); - m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Set(12, Normal_Direct); - m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Set(9, Normal_Direct); - m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Set(9, Normal_Direct); - m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Set(18, Normal_Direct); - m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Set(18, Normal_Direct); - m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Set(36, Normal_Direct); - - m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Set(3, Normal_Direct); - m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Set(3, Normal_Direct); - m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Set(6, Normal_Direct); - m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Set(6, Normal_Direct); - m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Set(12, Normal_Direct); - m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Set(9, Normal_Direct); - m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Set(9, Normal_Direct); - m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Set(18, Normal_Direct); - m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Set(18, Normal_Direct); - m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Set(36, Normal_Direct); - - m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Set(1, Normal_Index); - m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Set(1, Normal_Index); - m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Set(1, Normal_Index); - m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Set(1, Normal_Index); - m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Set(1, Normal_Index); - m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Set(1, Normal_Index); - m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Set(1, Normal_Index); - m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Set(1, Normal_Index); - m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Set(1, Normal_Index); - m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Set(1, Normal_Index); - - m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Set(1, Normal_Index); - m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Set(1, Normal_Index); - m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Set(1, Normal_Index); - m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Set(1, Normal_Index); - m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Set(1, Normal_Index); - m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Set(3, Normal_Index_Indices3); - m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Set(3, Normal_Index_Indices3); - m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Set(3, Normal_Index_Indices3); - m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Set(3, Normal_Index_Indices3); - m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Set(3, Normal_Index_Indices3); - - m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Set(2, Normal_Index); - m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Set(2, Normal_Index); - m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Set(2, Normal_Index); - m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Set(2, Normal_Index); - m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Set(2, Normal_Index); - m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Set(2, Normal_Index); - m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Set(2, Normal_Index); - m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Set(2, Normal_Index); - m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Set(2, Normal_Index); - m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Set(2, Normal_Index); - - m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Set(2, Normal_Index); - m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Set(2, Normal_Index); - m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Set(2, Normal_Index); - m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Set(2, Normal_Index); - m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Set(2, Normal_Index); - m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Set(6, Normal_Index_Indices3); - m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Set(6, Normal_Index_Indices3); - m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Set(6, Normal_Index_Indices3); - m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Set(6, Normal_Index_Indices3); - m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Set(6, Normal_Index_Indices3); + m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Normal_Direct(); + m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Normal_Direct(); + m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Normal_Direct(); + m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Normal_Direct(); + m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Normal_Direct(); + m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Normal_Direct(); + m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Normal_Direct(); + m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Normal_Direct(); + m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Normal_Direct(); + m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Normal_Direct(); + + // Same as above + m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Normal_Direct(); + m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Normal_Direct(); + m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Normal_Direct(); + m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Normal_Direct(); + m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Normal_Direct(); + m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Normal_Direct(); + m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Normal_Direct(); + m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Direct(); + m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Direct(); + m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Direct(); + + m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Normal_Index(); + m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Normal_Index(); + m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Normal_Index(); + m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Normal_Index(); + m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Normal_Index(); + m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Normal_Index(); + m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Normal_Index(); + m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Normal_Index(); + m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Normal_Index(); + m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Normal_Index(); + + // Same as above for NRM_NBT + m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Normal_Index(); + m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Normal_Index(); + m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Normal_Index(); + m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Normal_Index(); + m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Normal_Index(); + m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Normal_Index_Indices3(); + m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Normal_Index_Indices3(); + m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Index_Indices3(); + m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Index_Indices3(); + m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Index_Indices3(); + + m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Normal_Index(); + m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Normal_Index(); + m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Normal_Index(); + m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Normal_Index(); + m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Normal_Index(); + m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Normal_Index(); + m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Normal_Index(); + m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Normal_Index(); + m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Normal_Index(); + m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Normal_Index(); + + // Same as above for NRM_NBT + m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Normal_Index(); + m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Normal_Index(); + m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Normal_Index(); + m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Normal_Index(); + m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Normal_Index(); + m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Normal_Index_Indices3(); + m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Normal_Index_Indices3(); + m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Index_Indices3(); + m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Index_Indices3(); + m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Index_Indices3(); } unsigned int VertexLoader_Normal::GetSize(unsigned int _type, diff --git a/Source/Core/VideoCommon/Src/VertexLoader_Normal.h b/Source/Core/VideoCommon/Src/VertexLoader_Normal.h index 71d4387ff0..d538b2a72e 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader_Normal.h +++ b/Source/Core/VideoCommon/Src/VertexLoader_Normal.h @@ -70,12 +70,17 @@ private: NUM_NRM_INDICES }; - struct Set { - Set() {} - Set(int gc_size_, TPipelineFunction function_) : gc_size(gc_size_), function(function_) {} + struct Set + { + template + void operator=(const T&) + { + gc_size = T::size; + function = T::function; + } + int gc_size; TPipelineFunction function; -// int pc_size; }; static Set m_Table[NUM_NRM_TYPE][NUM_NRM_INDICES][NUM_NRM_ELEMENTS][NUM_NRM_FORMAT]; From 677c6229ad4f0313a2346eb5ed4ae777ce02882d Mon Sep 17 00:00:00 2001 From: Jordan Woyak Date: Thu, 21 Feb 2013 14:41:06 -0600 Subject: [PATCH 17/26] Experimental VertexLoader cleanup! --- Source/Core/VideoCommon/Src/VertexLoader.cpp | 87 ++----------------- .../VideoCommon/Src/VertexManagerBase.cpp | 27 +----- 2 files changed, 10 insertions(+), 104 deletions(-) diff --git a/Source/Core/VideoCommon/Src/VertexLoader.cpp b/Source/Core/VideoCommon/Src/VertexLoader.cpp index 23064dec9a..408c864406 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader.cpp @@ -562,86 +562,17 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count) for (int i = 0; i < 2; i++) colElements[i] = m_VtxAttr.color[i].Elements; - // if strips or fans, make sure all vertices can fit in buffer, otherwise flush - int granularity = 1; - switch (primitive) { - case 3: // strip .. hm, weird - case 4: // fan - if (VertexManager::GetRemainingSize() < 3 * native_stride) - VertexManager::Flush(); - break; - case 6: // line strip - if (VertexManager::GetRemainingSize() < 2 * native_stride) - VertexManager::Flush(); - break; - case 0: granularity = 4; break; // quads - case 2: granularity = 3; break; // tris - case 5: granularity = 2; break; // lines - } - - int startv = 0, extraverts = 0; - int v = 0; - - //int remainingVerts2 = VertexManager::GetRemainingVertices(primitive); - while (v < count) + if (VertexManager::GetRemainingSize() < count * native_stride) { - int remainingVerts = VertexManager::GetRemainingSize() / native_stride; - //if (remainingVerts2 - v + startv < remainingVerts) - //remainingVerts = remainingVerts2 - v + startv; - if (remainingVerts < granularity) { - INCSTAT(stats.thisFrame.numBufferSplits); - // This buffer full - break current primitive and flush, to switch to the next buffer. - if (v - startv > 0) - VertexManager::AddVertices(primitive, v - startv + extraverts); - VertexManager::Flush(); - //remainingVerts2 = VertexManager::GetRemainingVertices(primitive); - // Why does this need to be so complicated? - switch (primitive) { - case 3: // triangle strip, copy last two vertices - // a little trick since we have to keep track of signs - if (v & 1) { - g_pVideoData -= m_VertexSize*2; - ConvertVertices(1); - g_pVideoData -= m_VertexSize; - ConvertVertices(2); - extraverts = 3; - } - else { - g_pVideoData -= m_VertexSize*2; - ConvertVertices(2); - extraverts = 2; - } - break; - case 4: // tri fan, copy first and last vert - g_pVideoData -= m_VertexSize*(v-startv+extraverts); - ConvertVertices(1); - g_pVideoData += m_VertexSize*(v-startv+extraverts-2); - ConvertVertices(1); - extraverts = 2; - break; - case 6: // line strip - g_pVideoData -= m_VertexSize*1; - ConvertVertices(1); - extraverts = 1; - break; - default: - extraverts = 0; - break; - } - startv = v; - } - int remainingPrims = remainingVerts / granularity; - remainingVerts = remainingPrims * granularity; - if (count - v < remainingVerts) - remainingVerts = count - v; - - ConvertVertices(remainingVerts); - - v += remainingVerts; + VertexManager::Flush(); + + if (VertexManager::GetRemainingSize() < count * native_stride) + ERROR_LOG(VIDEO, "VertexManager: Buffer not large enough for all vertices! " + "Increase MAXVBUFFERSIZE or we need primitive breaking afterall."); } - - if (startv < count) - VertexManager::AddVertices(primitive, count - startv + extraverts); + + VertexManager::AddVertices(primitive, count); + ConvertVertices(count); } diff --git a/Source/Core/VideoCommon/Src/VertexManagerBase.cpp b/Source/Core/VideoCommon/Src/VertexManagerBase.cpp index 6fd1898db6..e36b03d1b1 100644 --- a/Source/Core/VideoCommon/Src/VertexManagerBase.cpp +++ b/Source/Core/VideoCommon/Src/VertexManagerBase.cpp @@ -92,6 +92,7 @@ int VertexManager::GetRemainingSize() return (int)(s_pEndBufferPointer - s_pCurBufferPointer); } +// Not used anywhere int VertexManager::GetRemainingVertices(int primitive) { switch (primitive) @@ -123,32 +124,6 @@ void VertexManager::AddVertices(int primitive, int numVertices) if (numVertices <= 0) return; - switch (primitive) - { - case GX_DRAW_QUADS: - case GX_DRAW_TRIANGLES: - case GX_DRAW_TRIANGLE_STRIP: - case GX_DRAW_TRIANGLE_FAN: - if (MAXIBUFFERSIZE - IndexGenerator::GetTriangleindexLen() < 3 * numVertices) - Flush(); - break; - - case GX_DRAW_LINES: - case GX_DRAW_LINE_STRIP: - if (MAXIBUFFERSIZE - IndexGenerator::GetLineindexLen() < 2 * numVertices) - Flush(); - break; - - case GX_DRAW_POINTS: - if (MAXIBUFFERSIZE - IndexGenerator::GetPointindexLen() < numVertices) - Flush(); - break; - - default: - return; - break; - } - if (Flushed) { IndexGenerator::Start(TIBuffer, LIBuffer, PIBuffer); From 368c78dd5c167208454c8c9ac8f458130f23740b Mon Sep 17 00:00:00 2001 From: Jordan Woyak Date: Thu, 21 Feb 2013 19:10:00 -0600 Subject: [PATCH 18/26] IndexGenerator cleanup. --- .../Core/VideoCommon/Src/IndexGenerator.cpp | 376 +++++++----------- Source/Core/VideoCommon/Src/IndexGenerator.h | 71 ++-- .../VideoCommon/Src/VertexManagerBase.cpp | 32 +- .../Core/VideoCommon/Src/VertexManagerBase.h | 3 +- 4 files changed, 193 insertions(+), 289 deletions(-) diff --git a/Source/Core/VideoCommon/Src/IndexGenerator.cpp b/Source/Core/VideoCommon/Src/IndexGenerator.cpp index 8053114fae..edcf93d6bd 100644 --- a/Source/Core/VideoCommon/Src/IndexGenerator.cpp +++ b/Source/Core/VideoCommon/Src/IndexGenerator.cpp @@ -15,6 +15,8 @@ // Official SVN repository and contact information can be found at // http://code.google.com/p/dolphin-emu/ +#include + #include "IndexGenerator.h" /* @@ -33,18 +35,12 @@ u16 *IndexGenerator::Lptr = 0; u16 *IndexGenerator::BASELptr = 0; u16 *IndexGenerator::Pptr = 0; u16 *IndexGenerator::BASEPptr = 0; -int IndexGenerator::numT = 0; -int IndexGenerator::numL = 0; -int IndexGenerator::numP = 0; -int IndexGenerator::index = 0; -int IndexGenerator::Tadds = 0; -int IndexGenerator::Ladds = 0; -int IndexGenerator::Padds = 0; -IndexGenerator::IndexPrimitiveType IndexGenerator::LastTPrimitive = Prim_None; -IndexGenerator::IndexPrimitiveType IndexGenerator::LastLPrimitive = Prim_None; -bool IndexGenerator::used = false; +u32 IndexGenerator::numT = 0; +u32 IndexGenerator::numL = 0; +u32 IndexGenerator::numP = 0; +u32 IndexGenerator::index = 0; -void IndexGenerator::Start(u16 *Triangleptr,u16 *Lineptr,u16 *Pointptr) +void IndexGenerator::Start(u16* Triangleptr, u16* Lineptr, u16* Pointptr) { Tptr = Triangleptr; Lptr = Lineptr; @@ -56,288 +52,220 @@ void IndexGenerator::Start(u16 *Triangleptr,u16 *Lineptr,u16 *Pointptr) numT = 0; numL = 0; numP = 0; - Tadds = 0; - Ladds = 0; - Padds = 0; - LastTPrimitive = Prim_None; - LastLPrimitive = Prim_None; } -// Triangles -void IndexGenerator::AddList(int numVerts) + +void IndexGenerator::AddIndices(int primitive, u32 numVerts) { - //if we have no vertices return - if(numVerts <= 0) return; - int numTris = numVerts / 3; + //switch (primitive) + //{ + //case GX_DRAW_QUADS: IndexGenerator::AddQuads(numVertices); break; + //case GX_DRAW_TRIANGLES: IndexGenerator::AddList(numVertices); break; + //case GX_DRAW_TRIANGLE_STRIP: IndexGenerator::AddStrip(numVertices); break; + //case GX_DRAW_TRIANGLE_FAN: IndexGenerator::AddFan(numVertices); break; + //case GX_DRAW_LINES: IndexGenerator::AddLineList(numVertices); break; + //case GX_DRAW_LINE_STRIP: IndexGenerator::AddLineStrip(numVertices); break; + //case GX_DRAW_POINTS: IndexGenerator::AddPoints(numVertices); break; + //} + + static void (*const primitive_table[])(u32) = + { + IndexGenerator::AddQuads, + NULL, + IndexGenerator::AddList, + IndexGenerator::AddStrip, + IndexGenerator::AddFan, + IndexGenerator::AddLineList, + IndexGenerator::AddLineStrip, + IndexGenerator::AddPoints, + }; + + primitive_table[primitive](numVerts); + index += numVerts; +} + +// Triangles +void IndexGenerator::WriteTriangle(u32 index1, u32 index2, u32 index3) +{ + *Tptr++ = index1; + *Tptr++ = index2; + *Tptr++ = index3; + + ++numT; +} + +void IndexGenerator::AddList(u32 const numVerts) +{ + auto const numTris = numVerts / 3; if (!numTris) { - //if we have less than 3 verts - if(numVerts == 1) + if (2 == numVerts) { - // discard - index++; - return; - } - else - { - //we have two verts render a degenerated triangle - numTris = 1; - *Tptr++ = index; - *Tptr++ = index+1; - *Tptr++ = index; + // We have two verts. Render a degenerated triangle. + WriteTriangle(index, index + 1, index); } } else { - for (int i = 0; i < numTris; i++) + for (u32 i = 0; i != numTris; ++i) { - *Tptr++ = index+i*3; - *Tptr++ = index+i*3+1; - *Tptr++ = index+i*3+2; + WriteTriangle(index + i * 3, index + i * 3 + 1, index + i * 3 + 2); } - int baseRemainingverts = numVerts - numVerts % 3; + + auto const base_remaining_verts = numTris * 3; switch (numVerts % 3) { case 2: - //whe have 2 remaining verts use strip method - *Tptr++ = index + baseRemainingverts - 1; - *Tptr++ = index + baseRemainingverts; - *Tptr++ = index + baseRemainingverts + 1; - numTris++; + // We have 2 remaining verts. Use strip method + WriteTriangle( + index + base_remaining_verts - 1, + index + base_remaining_verts, + index + base_remaining_verts + 1); + break; + case 1: - //whe have 1 remaining verts use strip method this is only a conjeture - *Tptr++ = index + baseRemainingverts - 2; - *Tptr++ = index + baseRemainingverts - 1; - *Tptr++ = index + baseRemainingverts; - numTris++; + // We have 1 remaining vert. Use strip method this is only a conjeture + WriteTriangle( + index + base_remaining_verts - 2, + index + base_remaining_verts - 1, + index + base_remaining_verts); break; + default: break; }; } - index += numVerts; - numT += numTris; - Tadds++; - LastTPrimitive = Prim_List; } -void IndexGenerator::AddStrip(int numVerts) +void IndexGenerator::AddStrip(u32 const numVerts) { - if(numVerts <= 0) return; - int numTris = numVerts - 2; - if (numTris < 1) + if (numVerts < 3) { - //if we have less than 3 verts - if(numVerts == 1) + if (2 == numVerts) { - // discard - index++; - return; - } - else - { - //we have two verts render a degenerated triangle - numTris = 1; - *Tptr++ = index; - *Tptr++ = index+1; - *Tptr++ = index; + // We have two verts. Render a degenerated triangle. + WriteTriangle(index, index + 1, index); } } else - { + { bool wind = false; - for (int i = 0; i < numTris; i++) + for (u32 i = 2; i < numVerts; ++i) { - *Tptr++ = index+i; - *Tptr++ = index+i+(wind?2:1); - *Tptr++ = index+i+(wind?1:2); - wind = !wind; + WriteTriangle( + index + i - 2, + index + i - !wind, + index + i - wind); + + wind ^= true; } } - index += numVerts; - numT += numTris; - Tadds++; - LastTPrimitive = Prim_Strip; -} -void IndexGenerator::AddFan(int numVerts) -{ - if(numVerts <= 0) return; - int numTris = numVerts - 2; - if (numTris < 1) - { - //if we have less than 3 verts - if(numVerts == 1) - { - //Discard - index++; - return; - } - else - { - //we have two verts render a degenerated triangle - numTris = 1; - *Tptr++ = index; - *Tptr++ = index+1; - *Tptr++ = index; - } - } - else - { - for (int i = 0; i < numTris; i++) - { - *Tptr++ = index; - *Tptr++ = index+i+1; - *Tptr++ = index+i+2; - } - } - index += numVerts; - numT += numTris; - Tadds++; - LastTPrimitive = Prim_Fan; } -void IndexGenerator::AddQuads(int numVerts) +void IndexGenerator::AddFan(u32 numVerts) { - if(numVerts <= 0) return; - int numTris = (numVerts/4)*2; - if (numTris == 0) + if (numVerts < 3) { - //if we have less than 3 verts - if(numVerts == 1) + if (2 == numVerts) { - //discard - index++; - return; - } - else - { - if(numVerts == 2) - { - //we have two verts render a degenerated triangle - numTris = 1; - *Tptr++ = index; - *Tptr++ = index + 1; - *Tptr++ = index; - } - else - { - //we have 3 verts render a full triangle - numTris = 1; - *Tptr++ = index; - *Tptr++ = index + 1; - *Tptr++ = index + 2; - } + // We have two verts. Render a degenerated triangle. + WriteTriangle(index, index + 1, index); } } else { - for (int i = 0; i < numTris / 2; i++) + for (u32 i = 2; i < numVerts; ++i) { - *Tptr++ = index+i*4; - *Tptr++ = index+i*4+1; - *Tptr++ = index+i*4+2; - *Tptr++ = index+i*4; - *Tptr++ = index+i*4+2; - *Tptr++ = index+i*4+3; + WriteTriangle(index, index + i - 1, index + i); } - int baseRemainingverts = numVerts - numVerts % 4; + } +} + +void IndexGenerator::AddQuads(u32 numVerts) +{ + auto const numQuads = numVerts / 4; + if (!numQuads) + { + if (2 == numVerts) + { + // We have two verts. Render a degenerated triangle. + WriteTriangle(index, index + 1, index); + } + else if (3 == numVerts); + { + // We have 3 verts. Render a full triangle. + WriteTriangle(index, index + 1, index + 2); + } + } + else + { + for (u32 i = 0; i != numQuads; ++i) + { + WriteTriangle(index + i * 4, index + i * 4 + 1, index + i * 4 + 2); + WriteTriangle(index + i * 4, index + i * 4 + 2, index + i * 4 + 3); + } + + auto const base_remaining_verts = numQuads * 4; switch (numVerts % 4) { case 3: - //whe have 3 remaining verts use strip method - *Tptr++ = index + baseRemainingverts; - *Tptr++ = index + baseRemainingverts + 1; - *Tptr++ = index + baseRemainingverts + 2; - numTris++; + // We have 3 remaining verts. Use strip method. + WriteTriangle( + index + base_remaining_verts, + index + base_remaining_verts + 1, + index + base_remaining_verts + 2); break; + case 2: - //whe have 2 remaining verts use strip method - *Tptr++ = index + baseRemainingverts - 1; - *Tptr++ = index + baseRemainingverts; - *Tptr++ = index + baseRemainingverts + 1; - numTris++; + // We have 3 remaining verts. Use strip method. + WriteTriangle( + index + base_remaining_verts - 1, + index + base_remaining_verts, + index + base_remaining_verts + 1); break; + case 1: - //whe have 1 remaining verts use strip method this is only a conjeture - *Tptr++ = index + baseRemainingverts - 2; - *Tptr++ = index + baseRemainingverts - 1; - *Tptr++ = index + baseRemainingverts; - numTris++; + // We have 1 remaining verts use strip method. This is only a conjeture. + WriteTriangle( + base_remaining_verts - 2, + index + base_remaining_verts - 1, + index + base_remaining_verts); break; + default: break; }; } - index += numVerts; - numT += numTris; - Tadds++; - LastTPrimitive = Prim_List; } - -//Lines -void IndexGenerator::AddLineList(int numVerts) +// Lines +void IndexGenerator::AddLineList(u32 numVerts) { - if(numVerts <= 0) return; - int numLines = numVerts / 2; - if (!numLines) + auto const numLines = numVerts / 2; + for (u32 i = 0; i != numLines; ++i) { - //Discard - index++; - return; + *Lptr++ = index + i * 2; + *Lptr++ = index + i * 2 + 1; + ++numL; } - else - { - for (int i = 0; i < numLines; i++) - { - *Lptr++ = index+i*2; - *Lptr++ = index+i*2+1; - } - if((numVerts & 1) != 0) - { - //use line strip for remaining vert - *Lptr++ = index + numLines * 2 - 1; - *Lptr++ = index + numLines * 2; - } - } - index += numVerts; - numL += numLines; - Ladds++; - LastLPrimitive = Prim_List; } -void IndexGenerator::AddLineStrip(int numVerts) +void IndexGenerator::AddLineStrip(u32 numVerts) { - int numLines = numVerts - 1; - if (numLines <= 0) + for (u32 i = 1; i < numVerts; ++i) { - if(numVerts == 1) - { - index++; - } - return; + *Lptr++ = index + i - 1; + *Lptr++ = index + i; + ++numL; } - for (int i = 0; i < numLines; i++) - { - *Lptr++ = index+i; - *Lptr++ = index+i+1; - } - index += numVerts; - numL += numLines; - Ladds++; - LastLPrimitive = Prim_Strip; } - - -//Points -void IndexGenerator::AddPoints(int numVerts) +// Points +void IndexGenerator::AddPoints(u32 numVerts) { - for (int i = 0; i < numVerts; i++) + for (u32 i = 0; i != numVerts; ++i) { - *Pptr++ = index+i; + *Pptr++ = index + i; + ++numP; } - index += numVerts; - numP += numVerts; - Padds++; } diff --git a/Source/Core/VideoCommon/Src/IndexGenerator.h b/Source/Core/VideoCommon/Src/IndexGenerator.h index d1ed143d98..5c6b7a5273 100644 --- a/Source/Core/VideoCommon/Src/IndexGenerator.h +++ b/Source/Core/VideoCommon/Src/IndexGenerator.h @@ -25,53 +25,58 @@ class IndexGenerator { public: - //Init + // Init static void Start(u16 *Triangleptr,u16 *Lineptr,u16 *Pointptr); - //Triangles - static void AddList(int numVerts); - static void AddStrip(int numVerts); - static void AddFan(int numVerts); - static void AddQuads(int numVerts); - //Lines - static void AddLineList(int numVerts); - static void AddLineStrip(int numVerts); - //Points - static void AddPoints(int numVerts); - //Interface - static int GetNumTriangles() {used = true; return numT;} - static int GetNumLines() {used = true;return numL;} - static int GetNumPoints() {used = true;return numP;} - static int GetNumVerts() {return index;} //returns numprimitives - static int GetNumAdds() {return Tadds + Ladds + Padds;} - static int GetTriangleindexLen() {return (int)(Tptr - BASETptr);} - static int GetLineindexLen() {return (int)(Lptr - BASELptr);} - static int GetPointindexLen() {return (int)(Pptr - BASEPptr);} - + + static void AddIndices(int primitive, u32 numVertices); + + // Interface + static u32 GetNumTriangles() {return numT;} + static u32 GetNumLines() {return numL;} + static u32 GetNumPoints() {return numP;} + + // returns numprimitives + static u32 GetNumVerts() {return index;} + + static u32 GetTriangleindexLen() {return (u32)(Tptr - BASETptr);} + static u32 GetLineindexLen() {return (u32)(Lptr - BASELptr);} + static u32 GetPointindexLen() {return (u32)(Pptr - BASEPptr);} +/* enum IndexPrimitiveType { Prim_None = 0, Prim_List, Prim_Strip, Prim_Fan - } ; + }; +*/ private: + // Triangles + static void AddList(u32 numVerts); + static void AddStrip(u32 numVerts); + static void AddFan(u32 numVerts); + static void AddQuads(u32 numVerts); + + static void WriteTriangle(u32 index1, u32 index2, u32 index3); + + // Lines + static void AddLineList(u32 numVerts); + static void AddLineStrip(u32 numVerts); + + // Points + static void AddPoints(u32 numVerts); + static u16 *Tptr; static u16 *BASETptr; static u16 *Lptr; static u16 *BASELptr; static u16 *Pptr; static u16 *BASEPptr; - static int numT; - static int numL; - static int numP; - static int index; - static int Tadds; - static int Ladds; - static int Padds; - static IndexPrimitiveType LastTPrimitive; - static IndexPrimitiveType LastLPrimitive; - static bool used; - + // TODO: redundant variables + static u32 numT; + static u32 numL; + static u32 numP; + static u32 index; }; #endif // _INDEXGENERATOR_H diff --git a/Source/Core/VideoCommon/Src/VertexManagerBase.cpp b/Source/Core/VideoCommon/Src/VertexManagerBase.cpp index e36b03d1b1..af4d646cc3 100644 --- a/Source/Core/VideoCommon/Src/VertexManagerBase.cpp +++ b/Source/Core/VideoCommon/Src/VertexManagerBase.cpp @@ -59,34 +59,6 @@ VertexManager::~VertexManager() ResetBuffer(); } -void VertexManager::AddIndices(int primitive, int numVertices) -{ - //switch (primitive) - //{ - //case GX_DRAW_QUADS: IndexGenerator::AddQuads(numVertices); break; - //case GX_DRAW_TRIANGLES: IndexGenerator::AddList(numVertices); break; - //case GX_DRAW_TRIANGLE_STRIP: IndexGenerator::AddStrip(numVertices); break; - //case GX_DRAW_TRIANGLE_FAN: IndexGenerator::AddFan(numVertices); break; - //case GX_DRAW_LINES: IndexGenerator::AddLineList(numVertices); break; - //case GX_DRAW_LINE_STRIP: IndexGenerator::AddLineStrip(numVertices); break; - //case GX_DRAW_POINTS: IndexGenerator::AddPoints(numVertices); break; - //} - - static void (*const primitive_table[])(int) = - { - IndexGenerator::AddQuads, - NULL, - IndexGenerator::AddList, - IndexGenerator::AddStrip, - IndexGenerator::AddFan, - IndexGenerator::AddLineList, - IndexGenerator::AddLineStrip, - IndexGenerator::AddPoints, - }; - - primitive_table[primitive](numVertices); -} - int VertexManager::GetRemainingSize() { return (int)(s_pEndBufferPointer - s_pCurBufferPointer); @@ -119,7 +91,7 @@ int VertexManager::GetRemainingVertices(int primitive) } } -void VertexManager::AddVertices(int primitive, int numVertices) +void VertexManager::AddVertices(int primitive, u32 numVertices) { if (numVertices <= 0) return; @@ -132,7 +104,7 @@ void VertexManager::AddVertices(int primitive, int numVertices) ADDSTAT(stats.thisFrame.numPrims, numVertices); INCSTAT(stats.thisFrame.numPrimitiveJoins); - AddIndices(primitive, numVertices); + IndexGenerator::AddIndices(primitive, numVertices); } void VertexManager::Flush() diff --git a/Source/Core/VideoCommon/Src/VertexManagerBase.h b/Source/Core/VideoCommon/Src/VertexManagerBase.h index d4608630ca..5169422b3c 100644 --- a/Source/Core/VideoCommon/Src/VertexManagerBase.h +++ b/Source/Core/VideoCommon/Src/VertexManagerBase.h @@ -27,7 +27,7 @@ public: VertexManager(); virtual ~VertexManager(); // needs to be virtual for DX11's dtor - static void AddVertices(int _primitive, int _numVertices); + static void AddVertices(int _primitive, u32 _numVertices); // TODO: protected? static u8 *s_pBaseBufferPointer; @@ -63,7 +63,6 @@ protected: void DoStateShared(PointerWrap& p); private: - static void AddIndices(int primitive, int numVertices); //virtual void Draw(u32 stride, bool alphapass) = 0; // temp virtual void vFlush() = 0; From f16dcfe6f69c0cb8ba89ea6611157c8c2eab3dad Mon Sep 17 00:00:00 2001 From: Jordan Woyak Date: Thu, 21 Feb 2013 21:01:53 -0600 Subject: [PATCH 19/26] Remove logic from IndexGenerator which does not seem to match that of Video Software. --- .../Core/VideoCommon/Src/IndexGenerator.cpp | 172 ++++-------------- Source/Core/VideoCommon/Src/IndexGenerator.h | 4 +- 2 files changed, 36 insertions(+), 140 deletions(-) diff --git a/Source/Core/VideoCommon/Src/IndexGenerator.cpp b/Source/Core/VideoCommon/Src/IndexGenerator.cpp index edcf93d6bd..a7d073890f 100644 --- a/Source/Core/VideoCommon/Src/IndexGenerator.cpp +++ b/Source/Core/VideoCommon/Src/IndexGenerator.cpp @@ -29,16 +29,16 @@ QUAD simulator */ //Init -u16 *IndexGenerator::Tptr = 0; -u16 *IndexGenerator::BASETptr = 0; -u16 *IndexGenerator::Lptr = 0; -u16 *IndexGenerator::BASELptr = 0; -u16 *IndexGenerator::Pptr = 0; -u16 *IndexGenerator::BASEPptr = 0; -u32 IndexGenerator::numT = 0; -u32 IndexGenerator::numL = 0; -u32 IndexGenerator::numP = 0; -u32 IndexGenerator::index = 0; +u16 *IndexGenerator::Tptr; +u16 *IndexGenerator::BASETptr; +u16 *IndexGenerator::Lptr; +u16 *IndexGenerator::BASELptr; +u16 *IndexGenerator::Pptr; +u16 *IndexGenerator::BASEPptr; +u32 IndexGenerator::numT; +u32 IndexGenerator::numL; +u32 IndexGenerator::numP; +u32 IndexGenerator::index; void IndexGenerator::Start(u16* Triangleptr, u16* Lineptr, u16* Pointptr) { @@ -58,13 +58,13 @@ void IndexGenerator::AddIndices(int primitive, u32 numVerts) { //switch (primitive) //{ - //case GX_DRAW_QUADS: IndexGenerator::AddQuads(numVertices); break; - //case GX_DRAW_TRIANGLES: IndexGenerator::AddList(numVertices); break; - //case GX_DRAW_TRIANGLE_STRIP: IndexGenerator::AddStrip(numVertices); break; - //case GX_DRAW_TRIANGLE_FAN: IndexGenerator::AddFan(numVertices); break; - //case GX_DRAW_LINES: IndexGenerator::AddLineList(numVertices); break; - //case GX_DRAW_LINE_STRIP: IndexGenerator::AddLineStrip(numVertices); break; - //case GX_DRAW_POINTS: IndexGenerator::AddPoints(numVertices); break; + //case GX_DRAW_QUADS: IndexGenerator::AddQuads(numVerts); break; + //case GX_DRAW_TRIANGLES: IndexGenerator::AddList(numVerts); break; + //case GX_DRAW_TRIANGLE_STRIP: IndexGenerator::AddStrip(numVerts); break; + //case GX_DRAW_TRIANGLE_FAN: IndexGenerator::AddFan(numVerts); break; + //case GX_DRAW_LINES: IndexGenerator::AddLineList(numVerts); break; + //case GX_DRAW_LINE_STRIP: IndexGenerator::AddLineStrip(numVerts); break; + //case GX_DRAW_POINTS: IndexGenerator::AddPoints(numVerts); break; //} static void (*const primitive_table[])(u32) = @@ -89,152 +89,48 @@ void IndexGenerator::WriteTriangle(u32 index1, u32 index2, u32 index3) *Tptr++ = index1; *Tptr++ = index2; *Tptr++ = index3; - + ++numT; } void IndexGenerator::AddList(u32 const numVerts) -{ +{ auto const numTris = numVerts / 3; - if (!numTris) + for (u32 i = 0; i != numTris; ++i) { - if (2 == numVerts) - { - // We have two verts. Render a degenerated triangle. - WriteTriangle(index, index + 1, index); - } - } - else - { - for (u32 i = 0; i != numTris; ++i) - { - WriteTriangle(index + i * 3, index + i * 3 + 1, index + i * 3 + 2); - } - - auto const base_remaining_verts = numTris * 3; - switch (numVerts % 3) - { - case 2: - // We have 2 remaining verts. Use strip method - WriteTriangle( - index + base_remaining_verts - 1, - index + base_remaining_verts, - index + base_remaining_verts + 1); - - break; - - case 1: - // We have 1 remaining vert. Use strip method this is only a conjeture - WriteTriangle( - index + base_remaining_verts - 2, - index + base_remaining_verts - 1, - index + base_remaining_verts); - break; - - default: - break; - }; + WriteTriangle(index + i * 3, index + i * 3 + 1, index + i * 3 + 2); } } void IndexGenerator::AddStrip(u32 const numVerts) { - if (numVerts < 3) + bool wind = false; + for (u32 i = 2; i < numVerts; ++i) { - if (2 == numVerts) - { - // We have two verts. Render a degenerated triangle. - WriteTriangle(index, index + 1, index); - } - } - else - { - bool wind = false; - for (u32 i = 2; i < numVerts; ++i) - { - WriteTriangle( - index + i - 2, - index + i - !wind, - index + i - wind); - - wind ^= true; - } + WriteTriangle( + index + i - 2, + index + i - !wind, + index + i - wind); + + wind ^= true; } } void IndexGenerator::AddFan(u32 numVerts) { - if (numVerts < 3) + for (u32 i = 2; i < numVerts; ++i) { - if (2 == numVerts) - { - // We have two verts. Render a degenerated triangle. - WriteTriangle(index, index + 1, index); - } - } - else - { - for (u32 i = 2; i < numVerts; ++i) - { - WriteTriangle(index, index + i - 1, index + i); - } + WriteTriangle(index, index + i - 1, index + i); } } void IndexGenerator::AddQuads(u32 numVerts) { auto const numQuads = numVerts / 4; - if (!numQuads) + for (u32 i = 0; i != numQuads; ++i) { - if (2 == numVerts) - { - // We have two verts. Render a degenerated triangle. - WriteTriangle(index, index + 1, index); - } - else if (3 == numVerts); - { - // We have 3 verts. Render a full triangle. - WriteTriangle(index, index + 1, index + 2); - } - } - else - { - for (u32 i = 0; i != numQuads; ++i) - { - WriteTriangle(index + i * 4, index + i * 4 + 1, index + i * 4 + 2); - WriteTriangle(index + i * 4, index + i * 4 + 2, index + i * 4 + 3); - } - - auto const base_remaining_verts = numQuads * 4; - switch (numVerts % 4) - { - case 3: - // We have 3 remaining verts. Use strip method. - WriteTriangle( - index + base_remaining_verts, - index + base_remaining_verts + 1, - index + base_remaining_verts + 2); - break; - - case 2: - // We have 3 remaining verts. Use strip method. - WriteTriangle( - index + base_remaining_verts - 1, - index + base_remaining_verts, - index + base_remaining_verts + 1); - break; - - case 1: - // We have 1 remaining verts use strip method. This is only a conjeture. - WriteTriangle( - base_remaining_verts - 2, - index + base_remaining_verts - 1, - index + base_remaining_verts); - break; - - default: - break; - }; + WriteTriangle(index + i * 4, index + i * 4 + 1, index + i * 4 + 2); + WriteTriangle(index + i * 4, index + i * 4 + 2, index + i * 4 + 3); } } diff --git a/Source/Core/VideoCommon/Src/IndexGenerator.h b/Source/Core/VideoCommon/Src/IndexGenerator.h index 5c6b7a5273..400d252bf8 100644 --- a/Source/Core/VideoCommon/Src/IndexGenerator.h +++ b/Source/Core/VideoCommon/Src/IndexGenerator.h @@ -57,8 +57,6 @@ private: static void AddFan(u32 numVerts); static void AddQuads(u32 numVerts); - static void WriteTriangle(u32 index1, u32 index2, u32 index3); - // Lines static void AddLineList(u32 numVerts); static void AddLineStrip(u32 numVerts); @@ -66,6 +64,8 @@ private: // Points static void AddPoints(u32 numVerts); + static void WriteTriangle(u32 index1, u32 index2, u32 index3); + static u16 *Tptr; static u16 *BASETptr; static u16 *Lptr; From 21bd5be753efb016ab5e3a75bf5e6f8e71a3576a Mon Sep 17 00:00:00 2001 From: Jordan Woyak Date: Thu, 21 Feb 2013 23:12:53 -0600 Subject: [PATCH 20/26] Buffer size changes. --- .../Core/VideoCommon/Src/VertexManagerBase.h | 35 ++++++++++--------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/Source/Core/VideoCommon/Src/VertexManagerBase.h b/Source/Core/VideoCommon/Src/VertexManagerBase.h index 5169422b3c..d846a65b90 100644 --- a/Source/Core/VideoCommon/Src/VertexManagerBase.h +++ b/Source/Core/VideoCommon/Src/VertexManagerBase.h @@ -7,25 +7,28 @@ class PointerWrap; class VertexManager { +private: + // What are the actual values? + static const u32 SMALLEST_POSSIBLE_VERTEX = 1; + static const u32 LARGEST_POSSIBLE_VERTEX = 188; + + static const u32 MAX_PRIMITIVES_PER_COMMAND = (u16)-1; + public: - - enum - { - // values from OGL backend - //MAXVBUFFERSIZE = 0x1FFFF, - //MAXIBUFFERSIZE = 0xFFFF, - - // values from DX9 backend - //MAXVBUFFERSIZE = 0x50000, - //MAXIBUFFERSIZE = 0xFFFF, - - // values from DX11 backend - MAXVBUFFERSIZE = 0x50000, - MAXIBUFFERSIZE = 0xFFFF, - }; + // values from OGL backend + //static const u32 MAXVBUFFERSIZE = 0x1FFFF; + + // values from DX9/11 backend + static const u32 MAXVBUFFERSIZE = MAX_PRIMITIVES_PER_COMMAND * LARGEST_POSSIBLE_VERTEX; + + // We may convert triangle-fans to triangle-lists, almost 3x as many indices. + // Watching for a full index buffer would probably be smarter than this calculation. + static const u32 MAXIBUFFERSIZE = MAXVBUFFERSIZE * 3 / SMALLEST_POSSIBLE_VERTEX; + //static const u32 MAXIBUFFERSIZE = MAX_PRIMITIVES_PER_COMMAND * 3; VertexManager(); - virtual ~VertexManager(); // needs to be virtual for DX11's dtor + // needs to be virtual for DX11's dtor + virtual ~VertexManager(); static void AddVertices(int _primitive, u32 _numVertices); From db1be085c2d0a359e5ee561bbbad8e8428a72552 Mon Sep 17 00:00:00 2001 From: Jordan Woyak Date: Fri, 22 Feb 2013 01:41:52 -0600 Subject: [PATCH 21/26] VertexManager cleanup --- Source/Core/VideoCommon/Src/VertexLoader.cpp | 3 +- .../VideoCommon/Src/VertexManagerBase.cpp | 74 ++++++++----------- .../Core/VideoCommon/Src/VertexManagerBase.h | 35 ++++----- .../Plugin_VideoDX11/Src/VertexManager.cpp | 12 +-- .../Plugin_VideoDX9/Src/VertexManager.cpp | 6 -- .../Plugin_VideoOGL/Src/VertexManager.cpp | 6 -- 6 files changed, 54 insertions(+), 82 deletions(-) diff --git a/Source/Core/VideoCommon/Src/VertexLoader.cpp b/Source/Core/VideoCommon/Src/VertexLoader.cpp index 408c864406..5e1490e020 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader.cpp @@ -571,8 +571,9 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count) "Increase MAXVBUFFERSIZE or we need primitive breaking afterall."); } - VertexManager::AddVertices(primitive, count); ConvertVertices(count); + VertexManager::AddVertices(primitive, count); + //VertexManager::Flush(); } diff --git a/Source/Core/VideoCommon/Src/VertexManagerBase.cpp b/Source/Core/VideoCommon/Src/VertexManagerBase.cpp index af4d646cc3..131d6de42a 100644 --- a/Source/Core/VideoCommon/Src/VertexManagerBase.cpp +++ b/Source/Core/VideoCommon/Src/VertexManagerBase.cpp @@ -12,25 +12,17 @@ #include "BPStructs.h" #include "VertexManagerBase.h" +#include "MainBase.h" #include "VideoConfig.h" VertexManager *g_vertex_manager; -u8 *VertexManager::s_pBaseBufferPointer; u8 *VertexManager::s_pCurBufferPointer; +u8 *VertexManager::s_pBaseBufferPointer; u8 *VertexManager::s_pEndBufferPointer; -u8 *VertexManager::LocalVBuffer; -u16 *VertexManager::TIBuffer; -u16 *VertexManager::LIBuffer; -u16 *VertexManager::PIBuffer; - -bool VertexManager::Flushed; - VertexManager::VertexManager() { - Flushed = false; - LocalVBuffer = new u8[MAXVBUFFERSIZE]; s_pCurBufferPointer = s_pBaseBufferPointer = LocalVBuffer; s_pEndBufferPointer = s_pBaseBufferPointer + MAXVBUFFERSIZE; @@ -39,12 +31,7 @@ VertexManager::VertexManager() LIBuffer = new u16[MAXIBUFFERSIZE]; PIBuffer = new u16[MAXIBUFFERSIZE]; - IndexGenerator::Start(TIBuffer, LIBuffer, PIBuffer); -} - -void VertexManager::ResetBuffer() -{ - s_pCurBufferPointer = s_pBaseBufferPointer; + ResetBuffer(); } VertexManager::~VertexManager() @@ -59,12 +46,25 @@ VertexManager::~VertexManager() ResetBuffer(); } +void VertexManager::ResetBuffer() +{ + s_pCurBufferPointer = s_pBaseBufferPointer; + IndexGenerator::Start(TIBuffer, LIBuffer, PIBuffer); +} + int VertexManager::GetRemainingSize() { return (int)(s_pEndBufferPointer - s_pCurBufferPointer); } +bool VertexManager::IsFlushed() const +{ + return s_pBaseBufferPointer == s_pCurBufferPointer; +} + // Not used anywhere +// TODO: use this +#if 0 int VertexManager::GetRemainingVertices(int primitive) { switch (primitive) @@ -90,42 +90,38 @@ int VertexManager::GetRemainingVertices(int primitive) break; } } +#endif void VertexManager::AddVertices(int primitive, u32 numVertices) { if (numVertices <= 0) return; - if (Flushed) - { - IndexGenerator::Start(TIBuffer, LIBuffer, PIBuffer); - Flushed = false; - } - ADDSTAT(stats.thisFrame.numPrims, numVertices); INCSTAT(stats.thisFrame.numPrimitiveJoins); + IndexGenerator::AddIndices(primitive, numVertices); } void VertexManager::Flush() { + if (g_vertex_manager->IsFlushed()) + return; + // loading a state will invalidate BP, so check for it g_video_backend->CheckInvalidState(); + VideoFifo_CheckEFBAccess(); + g_vertex_manager->vFlush(); + + g_vertex_manager->ResetBuffer(); } // TODO: need to merge more stuff into VideoCommon to use this #if (0) void VertexManager::Flush() { - if (s_pBaseBufferPointer == s_pCurBufferPointer || Flushed) - return; - - Flushed = true; - - VideoFifo_CheckEFBAccess(); - #if defined(_DEBUG) || defined(DEBUGFAST) PRIM_LOG("frame%d:\n texgen=%d, numchan=%d, dualtex=%d, ztex=%d, cole=%d, alpe=%d, ze=%d", g_ActiveConfig.iSaveTargetId, xfregs.numTexGens, xfregs.nNumChans, (int)xfregs.bEnableDualTexTransform, bpmem.ztex2.op, @@ -198,9 +194,9 @@ void VertexManager::Flush() // finally bind if (false == PixelShaderCache::SetShader(false, g_nativeVertexFmt->m_components)) - goto shader_fail; + return; if (false == VertexShaderCache::SetShader(g_nativeVertexFmt->m_components)) - goto shader_fail; + return; const int stride = g_nativeVertexFmt->GetVertexStride(); //if (g_nativeVertexFmt) @@ -212,7 +208,7 @@ void VertexManager::Flush() if (false == g_ActiveConfig.bDstAlphaPass && bpmem.dstalpha.enable && bpmem.blendmode.alphaupdate) { if (false == PixelShaderCache::SetShader(true, g_nativeVertexFmt->m_components)) - goto shader_fail; + return; g_vertex_manager->Draw(stride, true); } @@ -246,9 +242,6 @@ void VertexManager::Flush() } #endif ++g_Config.iSaveTargetId; - -shader_fail: - ResetBuffer(); } #endif @@ -259,12 +252,9 @@ void VertexManager::DoState(PointerWrap& p) void VertexManager::DoStateShared(PointerWrap& p) { - p.DoPointer(s_pCurBufferPointer, LocalVBuffer); + p.DoPointer(s_pCurBufferPointer, g_vertex_manager->LocalVBuffer); p.DoArray(LocalVBuffer, MAXVBUFFERSIZE); - p.DoArray(TIBuffer, MAXIBUFFERSIZE); - p.DoArray(LIBuffer, MAXIBUFFERSIZE); - p.DoArray(PIBuffer, MAXIBUFFERSIZE); - - if (p.GetMode() == PointerWrap::MODE_READ) - Flushed = false; + p.DoArray(g_vertex_manager->TIBuffer, MAXIBUFFERSIZE); + p.DoArray(g_vertex_manager->LIBuffer, MAXIBUFFERSIZE); + p.DoArray(g_vertex_manager->PIBuffer, MAXIBUFFERSIZE); } diff --git a/Source/Core/VideoCommon/Src/VertexManagerBase.h b/Source/Core/VideoCommon/Src/VertexManagerBase.h index d846a65b90..4bf5541cbd 100644 --- a/Source/Core/VideoCommon/Src/VertexManagerBase.h +++ b/Source/Core/VideoCommon/Src/VertexManagerBase.h @@ -32,46 +32,47 @@ public: static void AddVertices(int _primitive, u32 _numVertices); - // TODO: protected? - static u8 *s_pBaseBufferPointer; static u8 *s_pCurBufferPointer; + static u8 *s_pBaseBufferPointer; static u8 *s_pEndBufferPointer; static int GetRemainingSize(); - static int GetRemainingVertices(int primitive); + + //int GetRemainingVertices(int primitive); static void Flush(); virtual ::NativeVertexFormat* CreateNativeVertexFormat() = 0; - static u16* GetTriangleIndexBuffer() { return TIBuffer; } - static u16* GetLineIndexBuffer() { return LIBuffer; } - static u16* GetPointIndexBuffer() { return PIBuffer; } - static u8* GetVertexBuffer() { return s_pBaseBufferPointer; } + // TODO: use these instead of TIBuffer, etc + +// u16* GetTriangleIndexBuffer() { return TIBuffer; } +// u16* GetLineIndexBuffer() { return LIBuffer; } +// u16* GetPointIndexBuffer() { return PIBuffer; } +// u8* GetVertexBuffer() { return s_pBaseBufferPointer; } static void DoState(PointerWrap& p); virtual void CreateDeviceObjects(){}; virtual void DestroyDeviceObjects(){}; + protected: - // TODO: make private after Flush() is merged - static void ResetBuffer(); - - static u16 *TIBuffer; - static u16 *LIBuffer; - static u16 *PIBuffer; - - static bool Flushed; + u16* TIBuffer; + u16* LIBuffer; + u16* PIBuffer; virtual void vDoState(PointerWrap& p) { DoStateShared(p); } void DoStateShared(PointerWrap& p); private: + bool IsFlushed() const; + + void ResetBuffer(); + //virtual void Draw(u32 stride, bool alphapass) = 0; // temp virtual void vFlush() = 0; - static u8 *LocalVBuffer; - + u8* LocalVBuffer; }; extern VertexManager *g_vertex_manager; diff --git a/Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp b/Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp index b227a4d27b..573180e0b7 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp @@ -211,11 +211,6 @@ void VertexManager::Draw(UINT stride) void VertexManager::vFlush() { - if (s_pBaseBufferPointer == s_pCurBufferPointer) return; - if (Flushed) return; - Flushed=true; - VideoFifo_CheckEFBAccess(); - u32 usedtextures = 0; for (u32 i = 0; i < (u32)bpmem.genMode.numtevstages + 1; ++i) if (bpmem.tevorders[i / 2].getEnable(i & 1)) @@ -263,12 +258,12 @@ void VertexManager::vFlush() g_nativeVertexFmt->m_components)) { GFX_DEBUGGER_PAUSE_LOG_AT(NEXT_ERROR,true,{printf("Fail to set pixel shader\n");}); - goto shader_fail; + return; } if (!VertexShaderCache::SetShader(g_nativeVertexFmt->m_components)) { GFX_DEBUGGER_PAUSE_LOG_AT(NEXT_ERROR,true,{printf("Fail to set pixel shader\n");}); - goto shader_fail; + return; } LoadBuffers(); unsigned int stride = g_nativeVertexFmt->GetVertexStride(); @@ -280,9 +275,6 @@ void VertexManager::vFlush() GFX_DEBUGGER_PAUSE_AT(NEXT_FLUSH, true); g_renderer->RestoreState(); - -shader_fail: - ResetBuffer(); } } // namespace diff --git a/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp b/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp index 5ca6018fb1..7ef3052a96 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp @@ -307,11 +307,6 @@ void VertexManager::DrawVA(int stride) void VertexManager::vFlush() { - if (s_pBaseBufferPointer == s_pCurBufferPointer) return; - if (Flushed) return; - Flushed = true; - VideoFifo_CheckEFBAccess(); - u32 usedtextures = 0; for (u32 i = 0; i < (u32)bpmem.genMode.numtevstages + 1; ++i) if (bpmem.tevorders[i / 2].getEnable(i & 1)) @@ -388,7 +383,6 @@ shader_fail: CurrentIBufferIndex += IndexGenerator::GetTriangleindexLen() + IndexGenerator::GetLineindexLen() + IndexGenerator::GetPointindexLen(); CurrentVBufferIndex += IndexGenerator::GetNumVerts() * stride; } - ResetBuffer(); } } diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp b/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp index 5a07f42662..5777f225cd 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp @@ -100,10 +100,6 @@ void VertexManager::Draw() void VertexManager::vFlush() { - if (s_pBaseBufferPointer == s_pCurBufferPointer) return; - if (Flushed) return; - Flushed=true; - VideoFifo_CheckEFBAccess(); #if defined(_DEBUG) || defined(DEBUGFAST) PRIM_LOG("frame%d:\n texgen=%d, numchan=%d, dualtex=%d, ztex=%d, cole=%d, alpe=%d, ze=%d", g_ActiveConfig.iSaveTargetId, xfregs.numTexGen.numTexGens, xfregs.numChan.numColorChans, xfregs.dualTexTrans.enabled, bpmem.ztex2.op, @@ -240,8 +236,6 @@ void VertexManager::vFlush() GFX_DEBUGGER_PAUSE_AT(NEXT_FLUSH, true); //s_nCurVBOIndex = (s_nCurVBOIndex + 1) % ARRAYSIZE(s_vboBuffers); - s_pCurBufferPointer = s_pBaseBufferPointer; - IndexGenerator::Start(TIBuffer,LIBuffer,PIBuffer); #if defined(_DEBUG) || defined(DEBUGFAST) if (g_ActiveConfig.iLog & CONF_SAVESHADERS) From d3f1bb93f2454ab1e1daf4c2b75d18269719c9b3 Mon Sep 17 00:00:00 2001 From: Jordan Woyak Date: Fri, 22 Feb 2013 02:19:59 -0600 Subject: [PATCH 22/26] Use DataWrite in a few more places. --- Source/Core/VideoCommon/Src/VertexLoader.cpp | 26 +++++++++----------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/Source/Core/VideoCommon/Src/VertexLoader.cpp b/Source/Core/VideoCommon/Src/VertexLoader.cpp index 5e1490e020..7d1b9d99a5 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader.cpp @@ -97,10 +97,10 @@ void LOADERDECL PosMtx_ReadDirect_UByte() void LOADERDECL PosMtx_Write() { - *VertexManager::s_pCurBufferPointer++ = s_curposmtx; - *VertexManager::s_pCurBufferPointer++ = 0; - *VertexManager::s_pCurBufferPointer++ = 0; - *VertexManager::s_pCurBufferPointer++ = 0; + DataWrite(s_curposmtx); + DataWrite(0); + DataWrite(0); + DataWrite(0); } void LOADERDECL UpdateBoundingBoxPrepare() @@ -166,24 +166,22 @@ void LOADERDECL TexMtx_ReadDirect_UByte() void LOADERDECL TexMtx_Write_Float() { - *(float*)VertexManager::s_pCurBufferPointer = (float)s_curtexmtx[s_texmtxwrite++]; - VertexManager::s_pCurBufferPointer += 4; + DataWrite(float(s_curtexmtx[s_texmtxwrite++])); } void LOADERDECL TexMtx_Write_Float2() { - ((float*)VertexManager::s_pCurBufferPointer)[0] = 0; - ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)s_curtexmtx[s_texmtxwrite++]; - VertexManager::s_pCurBufferPointer += 8; + DataWrite(0.f); + DataWrite(float(s_curtexmtx[s_texmtxwrite++])); } void LOADERDECL TexMtx_Write_Float4() { - ((float*)VertexManager::s_pCurBufferPointer)[0] = 0; - ((float*)VertexManager::s_pCurBufferPointer)[1] = 0; - ((float*)VertexManager::s_pCurBufferPointer)[2] = s_curtexmtx[s_texmtxwrite++]; - ((float*)VertexManager::s_pCurBufferPointer)[3] = 0; // Just to fill out with 0. - VertexManager::s_pCurBufferPointer += 16; + DataWrite(0.f); + DataWrite(0.f); + DataWrite(float(s_curtexmtx[s_texmtxwrite++])); + // Just to fill out with 0. + DataWrite(0.f); } VertexLoader::VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr) From 86935ab4fd2d6fe2989a00d9b497fb0a3e08c3b4 Mon Sep 17 00:00:00 2001 From: Jordan Woyak Date: Fri, 22 Feb 2013 03:22:20 -0600 Subject: [PATCH 23/26] forceinline IndexGenerator::WriteTriangle --- Source/Core/VideoCommon/Src/IndexGenerator.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Source/Core/VideoCommon/Src/IndexGenerator.cpp b/Source/Core/VideoCommon/Src/IndexGenerator.cpp index a7d073890f..f2d23a7fce 100644 --- a/Source/Core/VideoCommon/Src/IndexGenerator.cpp +++ b/Source/Core/VideoCommon/Src/IndexGenerator.cpp @@ -17,6 +17,7 @@ #include +#include "Common.h" #include "IndexGenerator.h" /* @@ -84,7 +85,7 @@ void IndexGenerator::AddIndices(int primitive, u32 numVerts) } // Triangles -void IndexGenerator::WriteTriangle(u32 index1, u32 index2, u32 index3) +__forceinline void IndexGenerator::WriteTriangle(u32 index1, u32 index2, u32 index3) { *Tptr++ = index1; *Tptr++ = index2; From 6b80e6f83cf4557eff89402484c1d5b2f5606a5f Mon Sep 17 00:00:00 2001 From: Jordan Woyak Date: Tue, 26 Feb 2013 22:47:50 -0600 Subject: [PATCH 24/26] Tweak Vertex/Index buffer handling a bit. --- Source/Core/VideoCommon/Src/VertexLoader.cpp | 23 ++----- .../VideoCommon/Src/VertexManagerBase.cpp | 66 +++++++++++-------- .../Core/VideoCommon/Src/VertexManagerBase.h | 35 ++++------ .../Plugin_VideoDX11/Src/VertexManager.cpp | 6 +- .../Plugin_VideoDX9/Src/VertexManager.cpp | 12 ++-- .../Plugin_VideoOGL/Src/VertexManager.cpp | 6 +- 6 files changed, 69 insertions(+), 79 deletions(-) diff --git a/Source/Core/VideoCommon/Src/VertexLoader.cpp b/Source/Core/VideoCommon/Src/VertexLoader.cpp index 7d1b9d99a5..ca128b4f86 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader.cpp @@ -517,7 +517,7 @@ void VertexLoader::WriteSetVariable(int bits, void *address, OpArg value) #endif } -void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count) +void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int const count) { m_numLoadedVertices += count; @@ -560,21 +560,11 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count) for (int i = 0; i < 2; i++) colElements[i] = m_VtxAttr.color[i].Elements; - if (VertexManager::GetRemainingSize() < count * native_stride) - { - VertexManager::Flush(); - - if (VertexManager::GetRemainingSize() < count * native_stride) - ERROR_LOG(VIDEO, "VertexManager: Buffer not large enough for all vertices! " - "Increase MAXVBUFFERSIZE or we need primitive breaking afterall."); - } - + VertexManager::PrepareForAdditionalData(primitive, count, native_stride); ConvertVertices(count); VertexManager::AddVertices(primitive, count); - //VertexManager::Flush(); } - void VertexLoader::ConvertVertices ( int count ) { #ifdef USE_JIT @@ -598,7 +588,7 @@ void VertexLoader::ConvertVertices ( int count ) -void VertexLoader::RunCompiledVertices(int vtx_attr_group, int primitive, int count, u8* Data) +void VertexLoader::RunCompiledVertices(int vtx_attr_group, int primitive, int const count, u8* Data) { m_numLoadedVertices += count; @@ -641,16 +631,15 @@ void VertexLoader::RunCompiledVertices(int vtx_attr_group, int primitive, int co for (int i = 0; i < 2; i++) colElements[i] = m_VtxAttr.color[i].Elements; - if(VertexManager::GetRemainingSize() < native_stride * count) - VertexManager::Flush(); + VertexManager::PrepareForAdditionalData(primitive, count, native_stride); + memcpy_gc(VertexManager::s_pCurBufferPointer, Data, native_stride * count); VertexManager::s_pCurBufferPointer += native_stride * count; DataSkip(count * m_VertexSize); + VertexManager::AddVertices(primitive, count); } - - void VertexLoader::SetVAT(u32 _group0, u32 _group1, u32 _group2) { VAT vat; diff --git a/Source/Core/VideoCommon/Src/VertexManagerBase.cpp b/Source/Core/VideoCommon/Src/VertexManagerBase.cpp index 131d6de42a..bc1d51b69e 100644 --- a/Source/Core/VideoCommon/Src/VertexManagerBase.cpp +++ b/Source/Core/VideoCommon/Src/VertexManagerBase.cpp @@ -23,38 +23,43 @@ u8 *VertexManager::s_pEndBufferPointer; VertexManager::VertexManager() { - LocalVBuffer = new u8[MAXVBUFFERSIZE]; - s_pCurBufferPointer = s_pBaseBufferPointer = LocalVBuffer; - s_pEndBufferPointer = s_pBaseBufferPointer + MAXVBUFFERSIZE; + LocalVBuffer.resize(MAXVBUFFERSIZE); + s_pCurBufferPointer = s_pBaseBufferPointer = &LocalVBuffer[0]; + s_pEndBufferPointer = s_pBaseBufferPointer + LocalVBuffer.size(); - TIBuffer = new u16[MAXIBUFFERSIZE]; - LIBuffer = new u16[MAXIBUFFERSIZE]; - PIBuffer = new u16[MAXIBUFFERSIZE]; + TIBuffer.resize(MAXIBUFFERSIZE); + LIBuffer.resize(MAXIBUFFERSIZE); + PIBuffer.resize(MAXIBUFFERSIZE); ResetBuffer(); } VertexManager::~VertexManager() -{ - delete[] LocalVBuffer; - - delete[] TIBuffer; - delete[] LIBuffer; - delete[] PIBuffer; - - // TODO: necessary?? - ResetBuffer(); -} +{} void VertexManager::ResetBuffer() { s_pCurBufferPointer = s_pBaseBufferPointer; - IndexGenerator::Start(TIBuffer, LIBuffer, PIBuffer); + IndexGenerator::Start(GetTriangleIndexBuffer(), GetLineIndexBuffer(), GetPointIndexBuffer()); } -int VertexManager::GetRemainingSize() +u32 VertexManager::GetRemainingSize() { - return (int)(s_pEndBufferPointer - s_pCurBufferPointer); + return (u32)(s_pEndBufferPointer - s_pCurBufferPointer); +} + +void VertexManager::PrepareForAdditionalData(int primitive, u32 count, u32 stride) +{ + u32 const needed_vertex_bytes = count * stride; + + if (needed_vertex_bytes > GetRemainingSize() || count > GetRemainingIndices(primitive)) + { + Flush(); + + if (needed_vertex_bytes > GetRemainingSize()) + ERROR_LOG(VIDEO, "VertexManager: Buffer not large enough for all vertices! " + "Increase MAXVBUFFERSIZE or we need primitive breaking afterall."); + } } bool VertexManager::IsFlushed() const @@ -62,10 +67,7 @@ bool VertexManager::IsFlushed() const return s_pBaseBufferPointer == s_pCurBufferPointer; } -// Not used anywhere -// TODO: use this -#if 0 -int VertexManager::GetRemainingVertices(int primitive) +u32 VertexManager::GetRemainingIndices(int primitive) { switch (primitive) { @@ -90,7 +92,6 @@ int VertexManager::GetRemainingVertices(int primitive) break; } } -#endif void VertexManager::AddVertices(int primitive, u32 numVertices) { @@ -252,9 +253,16 @@ void VertexManager::DoState(PointerWrap& p) void VertexManager::DoStateShared(PointerWrap& p) { - p.DoPointer(s_pCurBufferPointer, g_vertex_manager->LocalVBuffer); - p.DoArray(LocalVBuffer, MAXVBUFFERSIZE); - p.DoArray(g_vertex_manager->TIBuffer, MAXIBUFFERSIZE); - p.DoArray(g_vertex_manager->LIBuffer, MAXIBUFFERSIZE); - p.DoArray(g_vertex_manager->PIBuffer, MAXIBUFFERSIZE); + // It seems we half-assume to be flushed here + // We update s_pCurBufferPointer yet don't worry about IndexGenerator's outdated pointers + // and maybe other things are overlooked + + p.Do(LocalVBuffer); + p.Do(TIBuffer); + p.Do(LIBuffer); + p.Do(PIBuffer); + + s_pBaseBufferPointer = &LocalVBuffer[0]; + s_pEndBufferPointer = s_pBaseBufferPointer + LocalVBuffer.size(); + p.DoPointer(s_pCurBufferPointer, s_pBaseBufferPointer); } diff --git a/Source/Core/VideoCommon/Src/VertexManagerBase.h b/Source/Core/VideoCommon/Src/VertexManagerBase.h index 4bf5541cbd..2690ffc03e 100644 --- a/Source/Core/VideoCommon/Src/VertexManagerBase.h +++ b/Source/Core/VideoCommon/Src/VertexManagerBase.h @@ -2,6 +2,8 @@ #ifndef _VERTEXMANAGERBASE_H #define _VERTEXMANAGERBASE_H +#include + class NativeVertexFormat; class PointerWrap; @@ -15,16 +17,10 @@ private: static const u32 MAX_PRIMITIVES_PER_COMMAND = (u16)-1; public: - // values from OGL backend - //static const u32 MAXVBUFFERSIZE = 0x1FFFF; - - // values from DX9/11 backend static const u32 MAXVBUFFERSIZE = MAX_PRIMITIVES_PER_COMMAND * LARGEST_POSSIBLE_VERTEX; // We may convert triangle-fans to triangle-lists, almost 3x as many indices. - // Watching for a full index buffer would probably be smarter than this calculation. - static const u32 MAXIBUFFERSIZE = MAXVBUFFERSIZE * 3 / SMALLEST_POSSIBLE_VERTEX; - //static const u32 MAXIBUFFERSIZE = MAX_PRIMITIVES_PER_COMMAND * 3; + static const u32 MAXIBUFFERSIZE = MAX_PRIMITIVES_PER_COMMAND * 3; VertexManager(); // needs to be virtual for DX11's dtor @@ -36,29 +32,23 @@ public: static u8 *s_pBaseBufferPointer; static u8 *s_pEndBufferPointer; - static int GetRemainingSize(); - - //int GetRemainingVertices(int primitive); + static u32 GetRemainingSize(); + static void PrepareForAdditionalData(int primitive, u32 count, u32 stride); + static u32 GetRemainingIndices(int primitive); static void Flush(); virtual ::NativeVertexFormat* CreateNativeVertexFormat() = 0; - // TODO: use these instead of TIBuffer, etc - -// u16* GetTriangleIndexBuffer() { return TIBuffer; } -// u16* GetLineIndexBuffer() { return LIBuffer; } -// u16* GetPointIndexBuffer() { return PIBuffer; } -// u8* GetVertexBuffer() { return s_pBaseBufferPointer; } - static void DoState(PointerWrap& p); virtual void CreateDeviceObjects(){}; virtual void DestroyDeviceObjects(){}; protected: - u16* TIBuffer; - u16* LIBuffer; - u16* PIBuffer; + u16* GetTriangleIndexBuffer() { return &TIBuffer[0]; } + u16* GetLineIndexBuffer() { return &LIBuffer[0]; } + u16* GetPointIndexBuffer() { return &PIBuffer[0]; } + u8* GetVertexBuffer() { return &s_pBaseBufferPointer[0]; } virtual void vDoState(PointerWrap& p) { DoStateShared(p); } void DoStateShared(PointerWrap& p); @@ -72,7 +62,10 @@ private: // temp virtual void vFlush() = 0; - u8* LocalVBuffer; + std::vector LocalVBuffer; + std::vector TIBuffer; + std::vector LIBuffer; + std::vector PIBuffer; }; extern VertexManager *g_vertex_manager; diff --git a/Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp b/Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp index 573180e0b7..b7c8392b42 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp @@ -136,9 +136,9 @@ void VertexManager::LoadBuffers() m_triangleDrawIndex = m_indexBufferCursor; m_lineDrawIndex = m_triangleDrawIndex + IndexGenerator::GetTriangleindexLen(); m_pointDrawIndex = m_lineDrawIndex + IndexGenerator::GetLineindexLen(); - memcpy((u16*)map.pData + m_triangleDrawIndex, TIBuffer, sizeof(u16) * IndexGenerator::GetTriangleindexLen()); - memcpy((u16*)map.pData + m_lineDrawIndex, LIBuffer, sizeof(u16) * IndexGenerator::GetLineindexLen()); - memcpy((u16*)map.pData + m_pointDrawIndex, PIBuffer, sizeof(u16) * IndexGenerator::GetPointindexLen()); + memcpy((u16*)map.pData + m_triangleDrawIndex, GetTriangleIndexBuffer(), sizeof(u16) * IndexGenerator::GetTriangleindexLen()); + memcpy((u16*)map.pData + m_lineDrawIndex, GetLineIndexBuffer(), sizeof(u16) * IndexGenerator::GetLineindexLen()); + memcpy((u16*)map.pData + m_pointDrawIndex, GetPointIndexBuffer(), sizeof(u16) * IndexGenerator::GetPointindexLen()); D3D::context->Unmap(m_indexBuffers[m_activeIndexBuffer], 0); m_indexBufferCursor += iCount; } diff --git a/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp b/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp index 7ef3052a96..7e1f4dfae2 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp @@ -192,17 +192,17 @@ void VertexManager::PrepareVBuffers(int stride) } if(TdataSize) { - memcpy(pIndices, TIBuffer, TdataSize * sizeof(u16)); + memcpy(pIndices, GetTriangleIndexBuffer(), TdataSize * sizeof(u16)); pIndices += TdataSize; } if(LDataSize) { - memcpy(pIndices, LIBuffer, LDataSize * sizeof(u16)); + memcpy(pIndices, GetLineIndexBuffer(), LDataSize * sizeof(u16)); pIndices += LDataSize; } if(PDataSize) { - memcpy(pIndices, PIBuffer, PDataSize * sizeof(u16)); + memcpy(pIndices, GetPointIndexBuffer(), PDataSize * sizeof(u16)); } IBuffers[CurrentIBuffer]->Unlock(); D3D::dev->SetStreamSource( 0, VBuffers[CurrentVBuffer], CurrentVBufferIndex, stride); @@ -266,7 +266,7 @@ void VertexManager::DrawVA(int stride) if (FAILED(D3D::dev->DrawIndexedPrimitiveUP( D3DPT_TRIANGLELIST, 0, IndexGenerator::GetNumVerts(), IndexGenerator::GetNumTriangles(), - TIBuffer, + GetTriangleIndexBuffer(), D3DFMT_INDEX16, s_pBaseBufferPointer, stride))) @@ -280,7 +280,7 @@ void VertexManager::DrawVA(int stride) if (FAILED(D3D::dev->DrawIndexedPrimitiveUP( D3DPT_LINELIST, 0, IndexGenerator::GetNumVerts(), IndexGenerator::GetNumLines(), - LIBuffer, + GetLineIndexBuffer(), D3DFMT_INDEX16, s_pBaseBufferPointer, stride))) @@ -294,7 +294,7 @@ void VertexManager::DrawVA(int stride) if (FAILED(D3D::dev->DrawIndexedPrimitiveUP( D3DPT_POINTLIST, 0, IndexGenerator::GetNumVerts(), IndexGenerator::GetNumPoints(), - PIBuffer, + GetPointIndexBuffer(), D3DFMT_INDEX16, s_pBaseBufferPointer, stride))) diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp b/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp index 5777f225cd..bbd48f90d9 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp @@ -83,17 +83,17 @@ void VertexManager::Draw() { if (IndexGenerator::GetNumTriangles() > 0) { - glDrawElements(GL_TRIANGLES, IndexGenerator::GetTriangleindexLen(), GL_UNSIGNED_SHORT, TIBuffer); + glDrawElements(GL_TRIANGLES, IndexGenerator::GetTriangleindexLen(), GL_UNSIGNED_SHORT, GetTriangleIndexBuffer()); INCSTAT(stats.thisFrame.numIndexedDrawCalls); } if (IndexGenerator::GetNumLines() > 0) { - glDrawElements(GL_LINES, IndexGenerator::GetLineindexLen(), GL_UNSIGNED_SHORT, LIBuffer); + glDrawElements(GL_LINES, IndexGenerator::GetLineindexLen(), GL_UNSIGNED_SHORT, GetLineIndexBuffer()); INCSTAT(stats.thisFrame.numIndexedDrawCalls); } if (IndexGenerator::GetNumPoints() > 0) { - glDrawElements(GL_POINTS, IndexGenerator::GetPointindexLen(), GL_UNSIGNED_SHORT, PIBuffer); + glDrawElements(GL_POINTS, IndexGenerator::GetPointindexLen(), GL_UNSIGNED_SHORT, GetPointIndexBuffer()); INCSTAT(stats.thisFrame.numIndexedDrawCalls); } } From bf58c70e9b5a16c56113fe5114a845f8000d4a9d Mon Sep 17 00:00:00 2001 From: Jordan Woyak Date: Mon, 4 Mar 2013 15:24:39 -0600 Subject: [PATCH 25/26] Move copy-pasted code into function. --- Source/Core/VideoCommon/Src/VertexLoader.cpp | 69 +++++--------------- Source/Core/VideoCommon/Src/VertexLoader.h | 2 + 2 files changed, 18 insertions(+), 53 deletions(-) diff --git a/Source/Core/VideoCommon/Src/VertexLoader.cpp b/Source/Core/VideoCommon/Src/VertexLoader.cpp index ca128b4f86..bbee1c0411 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader.cpp @@ -517,7 +517,7 @@ void VertexLoader::WriteSetVariable(int bits, void *address, OpArg value) #endif } -void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int const count) +int VertexLoader::SetupRunVertices(int vtx_attr_group, int primitive, int const count) { m_numLoadedVertices += count; @@ -536,7 +536,7 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int const coun { // if cull mode is none, ignore triangles and quads DataSkip(count * m_VertexSize); - return; + return 0; } m_NativeFmt->EnableComponents(m_NativeFmt->m_components); @@ -561,8 +561,15 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int const coun colElements[i] = m_VtxAttr.color[i].Elements; VertexManager::PrepareForAdditionalData(primitive, count, native_stride); - ConvertVertices(count); - VertexManager::AddVertices(primitive, count); + + return count; +} + +void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int const count) +{ + auto const new_count = SetupRunVertices(vtx_attr_group, primitive, count); + ConvertVertices(new_count); + VertexManager::AddVertices(primitive, new_count); } void VertexLoader::ConvertVertices ( int count ) @@ -585,59 +592,15 @@ void VertexLoader::ConvertVertices ( int count ) #endif } - - - void VertexLoader::RunCompiledVertices(int vtx_attr_group, int primitive, int const count, u8* Data) { - m_numLoadedVertices += count; - - // Flush if our vertex format is different from the currently set. - if (g_nativeVertexFmt != NULL && g_nativeVertexFmt != m_NativeFmt) - { - // We really must flush here. It's possible that the native representations - // of the two vtx formats are the same, but we have no way to easily check that - // now. - VertexManager::Flush(); - // Also move the Set() here? - } - g_nativeVertexFmt = m_NativeFmt; - - if (bpmem.genMode.cullmode == 3 && primitive < 5) - { - // if cull mode is none, ignore triangles and quads - DataSkip(count * m_VertexSize); - return; - } - - m_NativeFmt->EnableComponents(m_NativeFmt->m_components); - - // Load position and texcoord scale factors. - m_VtxAttr.PosFrac = g_VtxAttr[vtx_attr_group].g0.PosFrac; - m_VtxAttr.texCoord[0].Frac = g_VtxAttr[vtx_attr_group].g0.Tex0Frac; - m_VtxAttr.texCoord[1].Frac = g_VtxAttr[vtx_attr_group].g1.Tex1Frac; - m_VtxAttr.texCoord[2].Frac = g_VtxAttr[vtx_attr_group].g1.Tex2Frac; - m_VtxAttr.texCoord[3].Frac = g_VtxAttr[vtx_attr_group].g1.Tex3Frac; - m_VtxAttr.texCoord[4].Frac = g_VtxAttr[vtx_attr_group].g2.Tex4Frac; - m_VtxAttr.texCoord[5].Frac = g_VtxAttr[vtx_attr_group].g2.Tex5Frac; - m_VtxAttr.texCoord[6].Frac = g_VtxAttr[vtx_attr_group].g2.Tex6Frac; - m_VtxAttr.texCoord[7].Frac = g_VtxAttr[vtx_attr_group].g2.Tex7Frac; - - pVtxAttr = &m_VtxAttr; - posScale = fractionTable[m_VtxAttr.PosFrac]; - if (m_NativeFmt->m_components & VB_HAS_UVALL) - for (int i = 0; i < 8; i++) - tcScale[i] = fractionTable[m_VtxAttr.texCoord[i].Frac]; - for (int i = 0; i < 2; i++) - colElements[i] = m_VtxAttr.color[i].Elements; - - VertexManager::PrepareForAdditionalData(primitive, count, native_stride); + auto const new_count = SetupRunVertices(vtx_attr_group, primitive, count); - memcpy_gc(VertexManager::s_pCurBufferPointer, Data, native_stride * count); - VertexManager::s_pCurBufferPointer += native_stride * count; - DataSkip(count * m_VertexSize); + memcpy_gc(VertexManager::s_pCurBufferPointer, Data, native_stride * new_count); + VertexManager::s_pCurBufferPointer += native_stride * new_count; + DataSkip(new_count * m_VertexSize); - VertexManager::AddVertices(primitive, count); + VertexManager::AddVertices(primitive, new_count); } void VertexLoader::SetVAT(u32 _group0, u32 _group1, u32 _group2) diff --git a/Source/Core/VideoCommon/Src/VertexLoader.h b/Source/Core/VideoCommon/Src/VertexLoader.h index 4f4fc19e99..b7afbe4ba6 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader.h +++ b/Source/Core/VideoCommon/Src/VertexLoader.h @@ -83,6 +83,8 @@ public: ~VertexLoader(); int GetVertexSize() const {return m_VertexSize;} + + int SetupRunVertices(int vtx_attr_group, int primitive, int const count); void RunVertices(int vtx_attr_group, int primitive, int count); void RunCompiledVertices(int vtx_attr_group, int primitive, int count, u8* Data); From 10983b0eae0124b89d777b441d9916257a50f200 Mon Sep 17 00:00:00 2001 From: degasus Date: Wed, 6 Mar 2013 12:33:02 +0100 Subject: [PATCH 26/26] fix VertexManager::GetRemainingIndices the old implementation returns the amound of primitives fit into index buffers but also gc needs more than one index per primitve --- Source/Core/VideoCommon/Src/VertexManagerBase.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/Source/Core/VideoCommon/Src/VertexManagerBase.cpp b/Source/Core/VideoCommon/Src/VertexManagerBase.cpp index 55f55954c5..54830bcbea 100644 --- a/Source/Core/VideoCommon/Src/VertexManagerBase.cpp +++ b/Source/Core/VideoCommon/Src/VertexManagerBase.cpp @@ -59,6 +59,9 @@ void VertexManager::PrepareForAdditionalData(int primitive, u32 count, u32 strid if (needed_vertex_bytes > GetRemainingSize()) ERROR_LOG(VIDEO, "VertexManager: Buffer not large enough for all vertices! " "Increase MAXVBUFFERSIZE or we need primitive breaking afterall."); + if (count > GetRemainingIndices(primitive)) + ERROR_LOG(VIDEO, "VertexManager: Buffer not large enough for all indices! " + "Increase MAXIBUFFERSIZE or we need primitive breaking afterall."); } } @@ -72,24 +75,24 @@ u32 VertexManager::GetRemainingIndices(int primitive) switch (primitive) { case GX_DRAW_QUADS: + return (MAXIBUFFERSIZE - IndexGenerator::GetTriangleindexLen()) / 6 * 4; case GX_DRAW_TRIANGLES: + return (MAXIBUFFERSIZE - IndexGenerator::GetTriangleindexLen()); case GX_DRAW_TRIANGLE_STRIP: + return (MAXIBUFFERSIZE - IndexGenerator::GetTriangleindexLen()) / 3 + 2; case GX_DRAW_TRIANGLE_FAN: - return (MAXIBUFFERSIZE - IndexGenerator::GetTriangleindexLen()) / 3; - break; + return (MAXIBUFFERSIZE - IndexGenerator::GetTriangleindexLen()) / 3 + 2; case GX_DRAW_LINES: + return (MAXIBUFFERSIZE - IndexGenerator::GetLineindexLen()); case GX_DRAW_LINE_STRIP: - return (MAXIBUFFERSIZE - IndexGenerator::GetLineindexLen()) / 2; - break; + return (MAXIBUFFERSIZE - IndexGenerator::GetLineindexLen()) / 2 + 1; case GX_DRAW_POINTS: return (MAXIBUFFERSIZE - IndexGenerator::GetPointindexLen()); - break; default: return 0; - break; } }