diff --git a/Source/Core/Common/Src/CommonFuncs.h b/Source/Core/Common/Src/CommonFuncs.h index 7c6bcdc703..19223a74a7 100644 --- a/Source/Core/Common/Src/CommonFuncs.h +++ b/Source/Core/Common/Src/CommonFuncs.h @@ -172,6 +172,41 @@ inline u64 swap64(u64 data) {return ((u64)swap32(data) << 32) | swap32(data >> 3 inline u16 swap16(const u8* _pData) {return swap16(*(const u16*)_pData);} inline u32 swap32(const u8* _pData) {return swap32(*(const u32*)_pData);} inline u64 swap64(const u8* _pData) {return swap64(*(const u64*)_pData);} + +template +void swap(u8*); + +template <> +inline void swap<1>(u8* data) +{} + +template <> +inline void swap<2>(u8* data) +{ + *reinterpret_cast(data) = swap16(data); +} + +template <> +inline void swap<4>(u8* data) +{ + *reinterpret_cast(data) = swap32(data); +} + +template <> +inline void swap<8>(u8* data) +{ + *reinterpret_cast(data) = swap64(data); +} + +template +inline T FromBigEndian(T data) +{ + //static_assert(std::is_arithmetic::value, "function only makes sense with arithmetic types"); + + swap(reinterpret_cast(&data)); + return data; +} + } // Namespace Common #endif // _COMMONFUNCS_H_ diff --git a/Source/Core/VideoCommon/Src/DataReader.h b/Source/Core/VideoCommon/Src/DataReader.h index 06668f8bbc..03061229c0 100644 --- a/Source/Core/VideoCommon/Src/DataReader.h +++ b/Source/Core/VideoCommon/Src/DataReader.h @@ -20,6 +20,8 @@ #ifndef _DATAREADER_H #define _DATAREADER_H +#include "VertexManagerBase.h" + extern u8* g_pVideoData; #if _M_SSE >= 0x301 && !(defined __GNUC__ && !defined __SSSE3__) @@ -31,43 +33,63 @@ __forceinline void DataSkip(u32 skip) g_pVideoData += skip; } +// probably unnecessary +template +__forceinline void DataSkip() +{ + g_pVideoData += count; +} + +template +__forceinline T DataPeek(int _uOffset) +{ + auto const result = Common::FromBigEndian(*reinterpret_cast(g_pVideoData + _uOffset)); + return result; +} + +// TODO: kill these __forceinline u8 DataPeek8(int _uOffset) { - return g_pVideoData[_uOffset]; + return DataPeek(_uOffset); } __forceinline u16 DataPeek16(int _uOffset) { - return Common::swap16(*(u16*)&g_pVideoData[_uOffset]); + return DataPeek(_uOffset); } __forceinline u32 DataPeek32(int _uOffset) { - return Common::swap32(*(u32*)&g_pVideoData[_uOffset]); + return DataPeek(_uOffset); } +template +__forceinline T DataRead() +{ + auto const result = DataPeek(0); + DataSkip(); + return result; +} + +// TODO: kill these __forceinline u8 DataReadU8() { - return *g_pVideoData++; + return DataRead(); } __forceinline s8 DataReadS8() { - return (s8)(*g_pVideoData++); + return DataRead(); } __forceinline u16 DataReadU16() { - u16 tmp = Common::swap16(*(u16*)g_pVideoData); - g_pVideoData += 2; - return tmp; + return DataRead(); } __forceinline u32 DataReadU32() { - u32 tmp = Common::swap32(*(u32*)g_pVideoData); - g_pVideoData += 4; - return tmp; + return DataRead(); } typedef void (*DataReadU32xNfunc)(u32 *buf); @@ -120,58 +142,16 @@ __forceinline u32 DataReadU32Unswapped() return tmp; } -template -__forceinline T DataRead() -{ - T tmp = *(T*)g_pVideoData; - g_pVideoData += sizeof(T); - return tmp; -} - -template <> -__forceinline u16 DataRead() -{ - u16 tmp = Common::swap16(*(u16*)g_pVideoData); - g_pVideoData += 2; - return tmp; -} - -template <> -__forceinline s16 DataRead() -{ - s16 tmp = (s16)Common::swap16(*(u16*)g_pVideoData); - g_pVideoData += 2; - return tmp; -} - -template <> -__forceinline u32 DataRead() -{ - u32 tmp = (u32)Common::swap32(*(u32*)g_pVideoData); - g_pVideoData += 4; - return tmp; -} - -template <> -__forceinline s32 DataRead() -{ - s32 tmp = (s32)Common::swap32(*(u32*)g_pVideoData); - g_pVideoData += 4; - return tmp; -} - -__forceinline float DataReadF32() -{ - union {u32 i; float f;} temp; - temp.i = Common::swap32(*(u32*)g_pVideoData); - g_pVideoData += 4; - float tmp = temp.f; - return tmp; -} - __forceinline u8* DataGetPosition() { return g_pVideoData; } +template +__forceinline void DataWrite(T data) +{ + *(T*)VertexManager::s_pCurBufferPointer = data; + VertexManager::s_pCurBufferPointer += sizeof(T); +} + #endif diff --git a/Source/Core/VideoCommon/Src/IndexGenerator.cpp b/Source/Core/VideoCommon/Src/IndexGenerator.cpp index 8053114fae..f2d23a7fce 100644 --- a/Source/Core/VideoCommon/Src/IndexGenerator.cpp +++ b/Source/Core/VideoCommon/Src/IndexGenerator.cpp @@ -15,6 +15,9 @@ // Official SVN repository and contact information can be found at // http://code.google.com/p/dolphin-emu/ +#include + +#include "Common.h" #include "IndexGenerator.h" /* @@ -27,24 +30,18 @@ QUAD simulator */ //Init -u16 *IndexGenerator::Tptr = 0; -u16 *IndexGenerator::BASETptr = 0; -u16 *IndexGenerator::Lptr = 0; -u16 *IndexGenerator::BASELptr = 0; -u16 *IndexGenerator::Pptr = 0; -u16 *IndexGenerator::BASEPptr = 0; -int IndexGenerator::numT = 0; -int IndexGenerator::numL = 0; -int IndexGenerator::numP = 0; -int IndexGenerator::index = 0; -int IndexGenerator::Tadds = 0; -int IndexGenerator::Ladds = 0; -int IndexGenerator::Padds = 0; -IndexGenerator::IndexPrimitiveType IndexGenerator::LastTPrimitive = Prim_None; -IndexGenerator::IndexPrimitiveType IndexGenerator::LastLPrimitive = Prim_None; -bool IndexGenerator::used = false; +u16 *IndexGenerator::Tptr; +u16 *IndexGenerator::BASETptr; +u16 *IndexGenerator::Lptr; +u16 *IndexGenerator::BASELptr; +u16 *IndexGenerator::Pptr; +u16 *IndexGenerator::BASEPptr; +u32 IndexGenerator::numT; +u32 IndexGenerator::numL; +u32 IndexGenerator::numP; +u32 IndexGenerator::index; -void IndexGenerator::Start(u16 *Triangleptr,u16 *Lineptr,u16 *Pointptr) +void IndexGenerator::Start(u16* Triangleptr, u16* Lineptr, u16* Pointptr) { Tptr = Triangleptr; Lptr = Lineptr; @@ -56,288 +53,116 @@ void IndexGenerator::Start(u16 *Triangleptr,u16 *Lineptr,u16 *Pointptr) numT = 0; numL = 0; numP = 0; - Tadds = 0; - Ladds = 0; - Padds = 0; - LastTPrimitive = Prim_None; - LastLPrimitive = Prim_None; } + +void IndexGenerator::AddIndices(int primitive, u32 numVerts) +{ + //switch (primitive) + //{ + //case GX_DRAW_QUADS: IndexGenerator::AddQuads(numVerts); break; + //case GX_DRAW_TRIANGLES: IndexGenerator::AddList(numVerts); break; + //case GX_DRAW_TRIANGLE_STRIP: IndexGenerator::AddStrip(numVerts); break; + //case GX_DRAW_TRIANGLE_FAN: IndexGenerator::AddFan(numVerts); break; + //case GX_DRAW_LINES: IndexGenerator::AddLineList(numVerts); break; + //case GX_DRAW_LINE_STRIP: IndexGenerator::AddLineStrip(numVerts); break; + //case GX_DRAW_POINTS: IndexGenerator::AddPoints(numVerts); break; + //} + + static void (*const primitive_table[])(u32) = + { + IndexGenerator::AddQuads, + NULL, + IndexGenerator::AddList, + IndexGenerator::AddStrip, + IndexGenerator::AddFan, + IndexGenerator::AddLineList, + IndexGenerator::AddLineStrip, + IndexGenerator::AddPoints, + }; + + primitive_table[primitive](numVerts); + index += numVerts; +} + // Triangles -void IndexGenerator::AddList(int numVerts) +__forceinline void IndexGenerator::WriteTriangle(u32 index1, u32 index2, u32 index3) { - //if we have no vertices return - if(numVerts <= 0) return; - int numTris = numVerts / 3; - if (!numTris) - { - //if we have less than 3 verts - if(numVerts == 1) - { - // discard - index++; - return; - } - else - { - //we have two verts render a degenerated triangle - numTris = 1; - *Tptr++ = index; - *Tptr++ = index+1; - *Tptr++ = index; - } - } - else - { - for (int i = 0; i < numTris; i++) - { - *Tptr++ = index+i*3; - *Tptr++ = index+i*3+1; - *Tptr++ = index+i*3+2; - } - int baseRemainingverts = numVerts - numVerts % 3; - switch (numVerts % 3) - { - case 2: - //whe have 2 remaining verts use strip method - *Tptr++ = index + baseRemainingverts - 1; - *Tptr++ = index + baseRemainingverts; - *Tptr++ = index + baseRemainingverts + 1; - numTris++; - break; - case 1: - //whe have 1 remaining verts use strip method this is only a conjeture - *Tptr++ = index + baseRemainingverts - 2; - *Tptr++ = index + baseRemainingverts - 1; - *Tptr++ = index + baseRemainingverts; - numTris++; - break; - default: - break; - }; - } - index += numVerts; - numT += numTris; - Tadds++; - LastTPrimitive = Prim_List; + *Tptr++ = index1; + *Tptr++ = index2; + *Tptr++ = index3; + + ++numT; } -void IndexGenerator::AddStrip(int numVerts) +void IndexGenerator::AddList(u32 const numVerts) { - if(numVerts <= 0) return; - int numTris = numVerts - 2; - if (numTris < 1) + auto const numTris = numVerts / 3; + for (u32 i = 0; i != numTris; ++i) { - //if we have less than 3 verts - if(numVerts == 1) - { - // discard - index++; - return; - } - else - { - //we have two verts render a degenerated triangle - numTris = 1; - *Tptr++ = index; - *Tptr++ = index+1; - *Tptr++ = index; - } + WriteTriangle(index + i * 3, index + i * 3 + 1, index + i * 3 + 2); } - else - { - bool wind = false; - for (int i = 0; i < numTris; i++) - { - *Tptr++ = index+i; - *Tptr++ = index+i+(wind?2:1); - *Tptr++ = index+i+(wind?1:2); - wind = !wind; - } - } - index += numVerts; - numT += numTris; - Tadds++; - LastTPrimitive = Prim_Strip; -} -void IndexGenerator::AddFan(int numVerts) -{ - if(numVerts <= 0) return; - int numTris = numVerts - 2; - if (numTris < 1) - { - //if we have less than 3 verts - if(numVerts == 1) - { - //Discard - index++; - return; - } - else - { - //we have two verts render a degenerated triangle - numTris = 1; - *Tptr++ = index; - *Tptr++ = index+1; - *Tptr++ = index; - } - } - else - { - for (int i = 0; i < numTris; i++) - { - *Tptr++ = index; - *Tptr++ = index+i+1; - *Tptr++ = index+i+2; - } - } - index += numVerts; - numT += numTris; - Tadds++; - LastTPrimitive = Prim_Fan; } -void IndexGenerator::AddQuads(int numVerts) +void IndexGenerator::AddStrip(u32 const numVerts) { - if(numVerts <= 0) return; - int numTris = (numVerts/4)*2; - if (numTris == 0) + bool wind = false; + for (u32 i = 2; i < numVerts; ++i) { - //if we have less than 3 verts - if(numVerts == 1) - { - //discard - index++; - return; - } - else - { - if(numVerts == 2) - { - //we have two verts render a degenerated triangle - numTris = 1; - *Tptr++ = index; - *Tptr++ = index + 1; - *Tptr++ = index; - } - else - { - //we have 3 verts render a full triangle - numTris = 1; - *Tptr++ = index; - *Tptr++ = index + 1; - *Tptr++ = index + 2; - } - } + WriteTriangle( + index + i - 2, + index + i - !wind, + index + i - wind); + + wind ^= true; } - else - { - for (int i = 0; i < numTris / 2; i++) - { - *Tptr++ = index+i*4; - *Tptr++ = index+i*4+1; - *Tptr++ = index+i*4+2; - *Tptr++ = index+i*4; - *Tptr++ = index+i*4+2; - *Tptr++ = index+i*4+3; - } - int baseRemainingverts = numVerts - numVerts % 4; - switch (numVerts % 4) - { - case 3: - //whe have 3 remaining verts use strip method - *Tptr++ = index + baseRemainingverts; - *Tptr++ = index + baseRemainingverts + 1; - *Tptr++ = index + baseRemainingverts + 2; - numTris++; - break; - case 2: - //whe have 2 remaining verts use strip method - *Tptr++ = index + baseRemainingverts - 1; - *Tptr++ = index + baseRemainingverts; - *Tptr++ = index + baseRemainingverts + 1; - numTris++; - break; - case 1: - //whe have 1 remaining verts use strip method this is only a conjeture - *Tptr++ = index + baseRemainingverts - 2; - *Tptr++ = index + baseRemainingverts - 1; - *Tptr++ = index + baseRemainingverts; - numTris++; - break; - default: - break; - }; - } - index += numVerts; - numT += numTris; - Tadds++; - LastTPrimitive = Prim_List; } - -//Lines -void IndexGenerator::AddLineList(int numVerts) +void IndexGenerator::AddFan(u32 numVerts) { - if(numVerts <= 0) return; - int numLines = numVerts / 2; - if (!numLines) + for (u32 i = 2; i < numVerts; ++i) { - //Discard - index++; - return; + WriteTriangle(index, index + i - 1, index + i); } - else - { - for (int i = 0; i < numLines; i++) - { - *Lptr++ = index+i*2; - *Lptr++ = index+i*2+1; - } - if((numVerts & 1) != 0) - { - //use line strip for remaining vert - *Lptr++ = index + numLines * 2 - 1; - *Lptr++ = index + numLines * 2; - } - } - index += numVerts; - numL += numLines; - Ladds++; - LastLPrimitive = Prim_List; } -void IndexGenerator::AddLineStrip(int numVerts) +void IndexGenerator::AddQuads(u32 numVerts) { - int numLines = numVerts - 1; - if (numLines <= 0) + auto const numQuads = numVerts / 4; + for (u32 i = 0; i != numQuads; ++i) { - if(numVerts == 1) - { - index++; - } - return; + WriteTriangle(index + i * 4, index + i * 4 + 1, index + i * 4 + 2); + WriteTriangle(index + i * 4, index + i * 4 + 2, index + i * 4 + 3); } - for (int i = 0; i < numLines; i++) - { - *Lptr++ = index+i; - *Lptr++ = index+i+1; - } - index += numVerts; - numL += numLines; - Ladds++; - LastLPrimitive = Prim_Strip; } - - -//Points -void IndexGenerator::AddPoints(int numVerts) +// Lines +void IndexGenerator::AddLineList(u32 numVerts) { - for (int i = 0; i < numVerts; i++) + auto const numLines = numVerts / 2; + for (u32 i = 0; i != numLines; ++i) { - *Pptr++ = index+i; + *Lptr++ = index + i * 2; + *Lptr++ = index + i * 2 + 1; + ++numL; + } +} + +void IndexGenerator::AddLineStrip(u32 numVerts) +{ + for (u32 i = 1; i < numVerts; ++i) + { + *Lptr++ = index + i - 1; + *Lptr++ = index + i; + ++numL; + } +} + +// Points +void IndexGenerator::AddPoints(u32 numVerts) +{ + for (u32 i = 0; i != numVerts; ++i) + { + *Pptr++ = index + i; + ++numP; } - index += numVerts; - numP += numVerts; - Padds++; } diff --git a/Source/Core/VideoCommon/Src/IndexGenerator.h b/Source/Core/VideoCommon/Src/IndexGenerator.h index d1ed143d98..400d252bf8 100644 --- a/Source/Core/VideoCommon/Src/IndexGenerator.h +++ b/Source/Core/VideoCommon/Src/IndexGenerator.h @@ -25,53 +25,58 @@ class IndexGenerator { public: - //Init + // Init static void Start(u16 *Triangleptr,u16 *Lineptr,u16 *Pointptr); - //Triangles - static void AddList(int numVerts); - static void AddStrip(int numVerts); - static void AddFan(int numVerts); - static void AddQuads(int numVerts); - //Lines - static void AddLineList(int numVerts); - static void AddLineStrip(int numVerts); - //Points - static void AddPoints(int numVerts); - //Interface - static int GetNumTriangles() {used = true; return numT;} - static int GetNumLines() {used = true;return numL;} - static int GetNumPoints() {used = true;return numP;} - static int GetNumVerts() {return index;} //returns numprimitives - static int GetNumAdds() {return Tadds + Ladds + Padds;} - static int GetTriangleindexLen() {return (int)(Tptr - BASETptr);} - static int GetLineindexLen() {return (int)(Lptr - BASELptr);} - static int GetPointindexLen() {return (int)(Pptr - BASEPptr);} - + + static void AddIndices(int primitive, u32 numVertices); + + // Interface + static u32 GetNumTriangles() {return numT;} + static u32 GetNumLines() {return numL;} + static u32 GetNumPoints() {return numP;} + + // returns numprimitives + static u32 GetNumVerts() {return index;} + + static u32 GetTriangleindexLen() {return (u32)(Tptr - BASETptr);} + static u32 GetLineindexLen() {return (u32)(Lptr - BASELptr);} + static u32 GetPointindexLen() {return (u32)(Pptr - BASEPptr);} +/* enum IndexPrimitiveType { Prim_None = 0, Prim_List, Prim_Strip, Prim_Fan - } ; + }; +*/ private: + // Triangles + static void AddList(u32 numVerts); + static void AddStrip(u32 numVerts); + static void AddFan(u32 numVerts); + static void AddQuads(u32 numVerts); + + // Lines + static void AddLineList(u32 numVerts); + static void AddLineStrip(u32 numVerts); + + // Points + static void AddPoints(u32 numVerts); + + static void WriteTriangle(u32 index1, u32 index2, u32 index3); + static u16 *Tptr; static u16 *BASETptr; static u16 *Lptr; static u16 *BASELptr; static u16 *Pptr; static u16 *BASEPptr; - static int numT; - static int numL; - static int numP; - static int index; - static int Tadds; - static int Ladds; - static int Padds; - static IndexPrimitiveType LastTPrimitive; - static IndexPrimitiveType LastLPrimitive; - static bool used; - + // TODO: redundant variables + static u32 numT; + static u32 numL; + static u32 numP; + static u32 index; }; #endif // _INDEXGENERATOR_H diff --git a/Source/Core/VideoCommon/Src/VertexLoader.cpp b/Source/Core/VideoCommon/Src/VertexLoader.cpp index 526cbe7e49..6c85324ba8 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader.cpp @@ -73,6 +73,10 @@ int colElements[2]; float posScale; float tcScale[8]; +// bbox must read vertex position, so convert it to this buffer +static float s_bbox_vertex_buffer[3]; +static u8 *s_bbox_pCurBufferPointer_orig; + static const float fractionTable[32] = { 1.0f / (1U << 0), 1.0f / (1U << 1), 1.0f / (1U << 2), 1.0f / (1U << 3), 1.0f / (1U << 4), 1.0f / (1U << 5), 1.0f / (1U << 6), 1.0f / (1U << 7), @@ -95,23 +99,38 @@ void LOADERDECL PosMtx_ReadDirect_UByte() void LOADERDECL PosMtx_Write() { - *VertexManager::s_pCurBufferPointer++ = s_curposmtx; - *VertexManager::s_pCurBufferPointer++ = 0; - *VertexManager::s_pCurBufferPointer++ = 0; - *VertexManager::s_pCurBufferPointer++ = 0; + DataWrite(s_curposmtx); + DataWrite(0); + DataWrite(0); + DataWrite(0); +} + +void LOADERDECL UpdateBoundingBoxPrepare() +{ + if (!PixelEngine::bbox_active) + return; + + // set our buffer as videodata buffer, so we will get a copy of the vertex positions + // this is a big hack, but so we can use the same converting function then without bbox + s_bbox_pCurBufferPointer_orig = VertexManager::s_pCurBufferPointer; + VertexManager::s_pCurBufferPointer = (u8*)s_bbox_vertex_buffer; } void LOADERDECL UpdateBoundingBox() { if (!PixelEngine::bbox_active) return; + + // reset videodata pointer + VertexManager::s_pCurBufferPointer = s_bbox_pCurBufferPointer_orig; + + // copy vertex pointers + memcpy(VertexManager::s_pCurBufferPointer, s_bbox_vertex_buffer, 12); + VertexManager::s_pCurBufferPointer += 12; - // Truly evil hack, reading backwards from the write pointer. If we were writing to write-only - // memory like we might have been with a D3D vertex buffer, this would have been a bad idea. - float *data = (float *)(VertexManager::s_pCurBufferPointer - 12); // We must transform the just loaded point by the current world and projection matrix - in software. // Then convert to screen space and update the bounding box. - float p[3] = {data[0], data[1], data[2]}; + float p[3] = {s_bbox_vertex_buffer[0], s_bbox_vertex_buffer[1], s_bbox_vertex_buffer[2]}; const float *world_matrix = (float*)xfmem + MatrixIndexA.PosNormalMtxIdx * 4; const float *proj_matrix = &g_fProjectionMatrix[0]; @@ -149,24 +168,22 @@ void LOADERDECL TexMtx_ReadDirect_UByte() void LOADERDECL TexMtx_Write_Float() { - *(float*)VertexManager::s_pCurBufferPointer = (float)s_curtexmtx[s_texmtxwrite++]; - VertexManager::s_pCurBufferPointer += 4; + DataWrite(float(s_curtexmtx[s_texmtxwrite++])); } void LOADERDECL TexMtx_Write_Float2() { - ((float*)VertexManager::s_pCurBufferPointer)[0] = 0; - ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)s_curtexmtx[s_texmtxwrite++]; - VertexManager::s_pCurBufferPointer += 8; + DataWrite(0.f); + DataWrite(float(s_curtexmtx[s_texmtxwrite++])); } void LOADERDECL TexMtx_Write_Float4() { - ((float*)VertexManager::s_pCurBufferPointer)[0] = 0; - ((float*)VertexManager::s_pCurBufferPointer)[1] = 0; - ((float*)VertexManager::s_pCurBufferPointer)[2] = s_curtexmtx[s_texmtxwrite++]; - ((float*)VertexManager::s_pCurBufferPointer)[3] = 0; // Just to fill out with 0. - VertexManager::s_pCurBufferPointer += 16; + DataWrite(0.f); + DataWrite(0.f); + DataWrite(float(s_curtexmtx[s_texmtxwrite++])); + // Just to fill out with 0. + DataWrite(0.f); } VertexLoader::VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr) @@ -274,15 +291,16 @@ void VertexLoader::CompileVertexTranslator() if (m_VtxDesc.Tex7MatIdx) {m_VertexSize += 1; m_NativeFmt->m_components |= VB_HAS_TEXMTXIDX7; WriteCall(TexMtx_ReadDirect_UByte); } // Write vertex position loader - WriteCall(VertexLoader_Position::GetFunction(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements)); + if(g_ActiveConfig.bUseBBox) { + WriteCall(UpdateBoundingBoxPrepare); + WriteCall(VertexLoader_Position::GetFunction(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements)); + WriteCall(UpdateBoundingBox); + } else { + WriteCall(VertexLoader_Position::GetFunction(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements)); + } m_VertexSize += VertexLoader_Position::GetSize(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements); nat_offset += 12; - // OK, so we just got a point. Let's go back and read it for the bounding box. - - if(g_ActiveConfig.bUseBBox) - WriteCall(UpdateBoundingBox); - // Normals vtx_decl.num_normals = 0; if (m_VtxDesc.Normal != NOT_PRESENT) @@ -507,7 +525,8 @@ void VertexLoader::WriteSetVariable(int bits, void *address, OpArg value) #endif } #endif -void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count) + +int VertexLoader::SetupRunVertices(int vtx_attr_group, int primitive, int const count) { m_numLoadedVertices += count; @@ -526,7 +545,7 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count) { // if cull mode is none, ignore triangles and quads DataSkip(count * m_VertexSize); - return; + return 0; } m_NativeFmt->EnableComponents(m_NativeFmt->m_components); @@ -550,157 +569,48 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count) for (int i = 0; i < 2; i++) colElements[i] = m_VtxAttr.color[i].Elements; - // if strips or fans, make sure all vertices can fit in buffer, otherwise flush - int granularity = 1; - switch (primitive) { - case 3: // strip .. hm, weird - case 4: // fan - if (VertexManager::GetRemainingSize() < 3 * native_stride) - VertexManager::Flush(); - break; - case 6: // line strip - if (VertexManager::GetRemainingSize() < 2 * native_stride) - VertexManager::Flush(); - break; - case 0: granularity = 4; break; // quads - case 2: granularity = 3; break; // tris - case 5: granularity = 2; break; // lines - } - - int startv = 0, extraverts = 0; - int v = 0; - - //int remainingVerts2 = VertexManager::GetRemainingVertices(primitive); - while (v < count) - { - int remainingVerts = VertexManager::GetRemainingSize() / native_stride; - //if (remainingVerts2 - v + startv < remainingVerts) - //remainingVerts = remainingVerts2 - v + startv; - if (remainingVerts < granularity) { - INCSTAT(stats.thisFrame.numBufferSplits); - // This buffer full - break current primitive and flush, to switch to the next buffer. - u8* plastptr = VertexManager::s_pCurBufferPointer; - if (v - startv > 0) - VertexManager::AddVertices(primitive, v - startv + extraverts); - VertexManager::Flush(); - //remainingVerts2 = VertexManager::GetRemainingVertices(primitive); - // Why does this need to be so complicated? - switch (primitive) { - case 3: // triangle strip, copy last two vertices - // a little trick since we have to keep track of signs - if (v & 1) { - memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-2*native_stride, native_stride); - memcpy_gc(VertexManager::s_pCurBufferPointer+native_stride, plastptr-native_stride*2, 2*native_stride); - VertexManager::s_pCurBufferPointer += native_stride*3; - extraverts = 3; - } - else { - memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride*2, native_stride*2); - VertexManager::s_pCurBufferPointer += native_stride*2; - extraverts = 2; - } - break; - case 4: // tri fan, copy first and last vert - memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride*(v-startv+extraverts), native_stride); - VertexManager::s_pCurBufferPointer += native_stride; - memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride, native_stride); - VertexManager::s_pCurBufferPointer += native_stride; - extraverts = 2; - break; - case 6: // line strip - memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride, native_stride); - VertexManager::s_pCurBufferPointer += native_stride; - extraverts = 1; - break; - default: - extraverts = 0; - break; - } - startv = v; - } - int remainingPrims = remainingVerts / granularity; - remainingVerts = remainingPrims * granularity; - if (count - v < remainingVerts) - remainingVerts = count - v; - - #ifdef USE_JIT - if (remainingVerts > 0) { - loop_counter = remainingVerts; - ((void (*)())(void*)m_compiledCode)(); - } - #else - for (int s = 0; s < remainingVerts; s++) - { - tcIndex = 0; - colIndex = 0; - s_texmtxwrite = s_texmtxread = 0; - for (int i = 0; i < m_numPipelineStages; i++) - m_PipelineStages[i](); - PRIM_LOG("\n"); - } - #endif - v += remainingVerts; - } - - if (startv < count) - VertexManager::AddVertices(primitive, count - startv + extraverts); + VertexManager::PrepareForAdditionalData(primitive, count, native_stride); + + return count; } - - - -void VertexLoader::RunCompiledVertices(int vtx_attr_group, int primitive, int count, u8* Data) +void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int const count) { - m_numLoadedVertices += count; - - // Flush if our vertex format is different from the currently set. - if (g_nativeVertexFmt != NULL && g_nativeVertexFmt != m_NativeFmt) - { - // We really must flush here. It's possible that the native representations - // of the two vtx formats are the same, but we have no way to easily check that - // now. - VertexManager::Flush(); - // Also move the Set() here? - } - g_nativeVertexFmt = m_NativeFmt; - - if (bpmem.genMode.cullmode == 3 && primitive < 5) - { - // if cull mode is none, ignore triangles and quads - DataSkip(count * m_VertexSize); - return; - } - - m_NativeFmt->EnableComponents(m_NativeFmt->m_components); - - // Load position and texcoord scale factors. - m_VtxAttr.PosFrac = g_VtxAttr[vtx_attr_group].g0.PosFrac; - m_VtxAttr.texCoord[0].Frac = g_VtxAttr[vtx_attr_group].g0.Tex0Frac; - m_VtxAttr.texCoord[1].Frac = g_VtxAttr[vtx_attr_group].g1.Tex1Frac; - m_VtxAttr.texCoord[2].Frac = g_VtxAttr[vtx_attr_group].g1.Tex2Frac; - m_VtxAttr.texCoord[3].Frac = g_VtxAttr[vtx_attr_group].g1.Tex3Frac; - m_VtxAttr.texCoord[4].Frac = g_VtxAttr[vtx_attr_group].g2.Tex4Frac; - m_VtxAttr.texCoord[5].Frac = g_VtxAttr[vtx_attr_group].g2.Tex5Frac; - m_VtxAttr.texCoord[6].Frac = g_VtxAttr[vtx_attr_group].g2.Tex6Frac; - m_VtxAttr.texCoord[7].Frac = g_VtxAttr[vtx_attr_group].g2.Tex7Frac; - - pVtxAttr = &m_VtxAttr; - posScale = fractionTable[m_VtxAttr.PosFrac]; - if (m_NativeFmt->m_components & VB_HAS_UVALL) - for (int i = 0; i < 8; i++) - tcScale[i] = fractionTable[m_VtxAttr.texCoord[i].Frac]; - for (int i = 0; i < 2; i++) - colElements[i] = m_VtxAttr.color[i].Elements; - - if(VertexManager::GetRemainingSize() < native_stride * count) - VertexManager::Flush(); - memcpy_gc(VertexManager::s_pCurBufferPointer, Data, native_stride * count); - VertexManager::s_pCurBufferPointer += native_stride * count; - DataSkip(count * m_VertexSize); - VertexManager::AddVertices(primitive, count); + auto const new_count = SetupRunVertices(vtx_attr_group, primitive, count); + ConvertVertices(new_count); + VertexManager::AddVertices(primitive, new_count); } +void VertexLoader::ConvertVertices ( int count ) +{ +#ifdef USE_JIT + if (count > 0) { + loop_counter = count; + ((void (*)())(void*)m_compiledCode)(); + } +#else + for (int s = 0; s < count; s++) + { + tcIndex = 0; + colIndex = 0; + s_texmtxwrite = s_texmtxread = 0; + for (int i = 0; i < m_numPipelineStages; i++) + m_PipelineStages[i](); + PRIM_LOG("\n"); + } +#endif +} +void VertexLoader::RunCompiledVertices(int vtx_attr_group, int primitive, int const count, u8* Data) +{ + auto const new_count = SetupRunVertices(vtx_attr_group, primitive, count); + + memcpy_gc(VertexManager::s_pCurBufferPointer, Data, native_stride * new_count); + VertexManager::s_pCurBufferPointer += native_stride * new_count; + DataSkip(new_count * m_VertexSize); + + VertexManager::AddVertices(primitive, new_count); +} void VertexLoader::SetVAT(u32 _group0, u32 _group1, u32 _group2) { diff --git a/Source/Core/VideoCommon/Src/VertexLoader.h b/Source/Core/VideoCommon/Src/VertexLoader.h index 0f321cd14f..c107b6b184 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader.h +++ b/Source/Core/VideoCommon/Src/VertexLoader.h @@ -88,6 +88,8 @@ public: ~VertexLoader(); int GetVertexSize() const {return m_VertexSize;} + + int SetupRunVertices(int vtx_attr_group, int primitive, int const count); void RunVertices(int vtx_attr_group, int primitive, int count); void RunCompiledVertices(int vtx_attr_group, int primitive, int count, u8* Data); @@ -124,6 +126,7 @@ private: void SetVAT(u32 _group0, u32 _group1, u32 _group2); void CompileVertexTranslator(); + void ConvertVertices(int count); void WriteCall(TPipelineFunction); diff --git a/Source/Core/VideoCommon/Src/VertexLoader_Color.cpp b/Source/Core/VideoCommon/Src/VertexLoader_Color.cpp index 9cfa5efc31..fa1ecbe973 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader_Color.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader_Color.cpp @@ -15,9 +15,6 @@ // Official SVN repository and contact information can be found at // http://code.google.com/p/dolphin-emu/ -#ifndef _VERTEXLOADERCOLOR_H -#define _VERTEXLOADERCOLOR_H - #include "Common.h" #include "VideoCommon.h" #include "LookUpTables.h" @@ -37,8 +34,7 @@ extern int colElements[2]; __forceinline void _SetCol(u32 val) { - *(u32*)VertexManager::s_pCurBufferPointer = val; - VertexManager::s_pCurBufferPointer += 4; + DataWrite(val); colIndex++; } @@ -132,80 +128,65 @@ void LOADERDECL Color_ReadDirect_32b_8888() _SetCol(col); } - - -void LOADERDECL Color_ReadIndex8_16b_565() +template +void Color_ReadIndex_16b_565() { - u8 Index = DataReadU8(); + auto const Index = DataRead(); u16 val = Common::swap16(*(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]))); _SetCol565(val); } -void LOADERDECL Color_ReadIndex8_24b_888() + +template +void Color_ReadIndex_24b_888() { - u8 Index = DataReadU8(); + auto const Index = DataRead(); const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]); _SetCol(_Read24(iAddress)); } -void LOADERDECL Color_ReadIndex8_32b_888x() + +template +void Color_ReadIndex_32b_888x() { - u8 Index = DataReadU8(); + auto const Index = DataRead(); const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]); _SetCol(_Read24(iAddress)); } -void LOADERDECL Color_ReadIndex8_16b_4444() + +template +void Color_ReadIndex_16b_4444() { - u8 Index = DataReadU8(); + auto const Index = DataRead(); u16 val = *(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex])); _SetCol4444(val); } -void LOADERDECL Color_ReadIndex8_24b_6666() + +template +void Color_ReadIndex_24b_6666() { - u8 Index = DataReadU8(); + auto const Index = DataRead(); const u8* pData = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]) - 1; u32 val = Common::swap32(pData); _SetCol6666(val); } -void LOADERDECL Color_ReadIndex8_32b_8888() + +template +void Color_ReadIndex_32b_8888() { - u8 Index = DataReadU8(); + auto const Index = DataRead(); const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]); _SetCol(_Read32(iAddress)); } -void LOADERDECL Color_ReadIndex16_16b_565() -{ - u16 Index = DataReadU16(); - u16 val = Common::swap16(*(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]))); - _SetCol565(val); -} -void LOADERDECL Color_ReadIndex16_24b_888() -{ - u16 Index = DataReadU16(); - const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]); - _SetCol(_Read24(iAddress)); -} -void LOADERDECL Color_ReadIndex16_32b_888x() -{ - u16 Index = DataReadU16(); - const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]); - _SetCol(_Read24(iAddress)); -} -void LOADERDECL Color_ReadIndex16_16b_4444() -{ - u16 Index = DataReadU16(); - u16 val = *(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex])); - _SetCol4444(val); -} -void LOADERDECL Color_ReadIndex16_24b_6666() -{ - u16 Index = DataReadU16(); - const u8 *pData = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]) - 1; - u32 val = Common::swap32(pData); - _SetCol6666(val); -} -void LOADERDECL Color_ReadIndex16_32b_8888() -{ - u16 Index = DataReadU16(); - const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]); - _SetCol(_Read32(iAddress)); -} -#endif + +void LOADERDECL Color_ReadIndex8_16b_565() { Color_ReadIndex_16b_565(); } +void LOADERDECL Color_ReadIndex8_24b_888() { Color_ReadIndex_24b_888(); } +void LOADERDECL Color_ReadIndex8_32b_888x() { Color_ReadIndex_32b_888x(); } +void LOADERDECL Color_ReadIndex8_16b_4444() { Color_ReadIndex_16b_4444(); } +void LOADERDECL Color_ReadIndex8_24b_6666() { Color_ReadIndex_24b_6666(); } +void LOADERDECL Color_ReadIndex8_32b_8888() { Color_ReadIndex_32b_8888(); } + +void LOADERDECL Color_ReadIndex16_16b_565() { Color_ReadIndex_16b_565(); } +void LOADERDECL Color_ReadIndex16_24b_888() { Color_ReadIndex_24b_888(); } +void LOADERDECL Color_ReadIndex16_32b_888x() { Color_ReadIndex_32b_888x(); } +void LOADERDECL Color_ReadIndex16_16b_4444() { Color_ReadIndex_16b_4444(); } +void LOADERDECL Color_ReadIndex16_24b_6666() { Color_ReadIndex_24b_6666(); } +void LOADERDECL Color_ReadIndex16_32b_8888() { Color_ReadIndex_32b_8888(); } diff --git a/Source/Core/VideoCommon/Src/VertexLoader_Normal.cpp b/Source/Core/VideoCommon/Src/VertexLoader_Normal.cpp index 830bd3de13..7e016828be 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader_Normal.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader_Normal.cpp @@ -22,6 +22,7 @@ #include "VertexManagerBase.h" #include "CPUDetect.h" #include +#include #if _M_SSE >= 0x401 #include @@ -30,78 +31,163 @@ #include #endif +// warning: mapping buffer should be disabled to use this #define LOG_NORM() // PRIM_LOG("norm: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-3], ((float*)VertexManager::s_pCurBufferPointer)[-2], ((float*)VertexManager::s_pCurBufferPointer)[-1]); VertexLoader_Normal::Set VertexLoader_Normal::m_Table[NUM_NRM_TYPE][NUM_NRM_INDICES][NUM_NRM_ELEMENTS][NUM_NRM_FORMAT]; +namespace +{ + +template +__forceinline float FracAdjust(T val) +{ + //auto const S8FRAC = 1.f / (1u << 6); + //auto const U8FRAC = 1.f / (1u << 7); + //auto const S16FRAC = 1.f / (1u << 14); + //auto const U16FRAC = 1.f / (1u << 15); + + // TODO: is this right? + return val / float(1u << (sizeof(T) * 8 - std::numeric_limits::is_signed - 1)); +} + +template <> +__forceinline float FracAdjust(float val) +{ return val; } + +template +__forceinline void ReadIndirect(const T* data) +{ + static_assert(3 == N || 9 == N, "N is only sane as 3 or 9!"); + + for (int i = 0; i != N; ++i) + { + DataWrite(FracAdjust(Common::FromBigEndian(data[i]))); + } + + LOG_NORM(); +} + +template +struct Normal_Direct +{ + static void LOADERDECL function() + { + auto const source = reinterpret_cast(DataGetPosition()); + ReadIndirect(source); + DataSkip(); + } + + static const int size = sizeof(T) * N * 3; +}; + +template +__forceinline void Normal_Index_Offset() +{ + static_assert(!std::numeric_limits::is_signed, "Only unsigned I is sane!"); + + auto const index = DataRead(); + auto const data = reinterpret_cast(cached_arraybases[ARRAY_NORMAL] + + (index * arraystrides[ARRAY_NORMAL]) + sizeof(T) * 3 * Offset); + ReadIndirect(data); +} + +template +struct Normal_Index +{ + static void LOADERDECL function() + { + Normal_Index_Offset(); + } + + static const int size = sizeof(I); +}; + +template +struct Normal_Index_Indices3 +{ + static void LOADERDECL function() + { + Normal_Index_Offset(); + Normal_Index_Offset(); + Normal_Index_Offset(); + } + + static const int size = sizeof(I) * 3; +}; + +} + void VertexLoader_Normal::Init(void) { - // HACK is for signed instead of unsigned to prevent crashes. - m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Set(3, Normal_DirectByte); //HACK - m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Set(3, Normal_DirectByte); - m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Set(6, Normal_DirectShort); //HACK - m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Set(6, Normal_DirectShort); - m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Set(12, Normal_DirectFloat); - m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Set(9, Normal_DirectByte3); //HACK - m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Set(9, Normal_DirectByte3); - m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Set(18, Normal_DirectShort3); //HACK - m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Set(18, Normal_DirectShort3); - m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Set(36, Normal_DirectFloat3); - - m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Set(3, Normal_DirectByte); //HACK - m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Set(3, Normal_DirectByte); - m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Set(6, Normal_DirectShort); //HACK - m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Set(6, Normal_DirectShort); - m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Set(12, Normal_DirectFloat); - m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Set(9, Normal_DirectByte3); //HACK - m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Set(9, Normal_DirectByte3); - m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Set(18, Normal_DirectShort3); //HACK - m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Set(18, Normal_DirectShort3); - m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Set(36, Normal_DirectFloat3); - - m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Set(1, Normal_Index8_Byte); //HACK - m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Set(1, Normal_Index8_Byte); - m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Set(1, Normal_Index8_Short); //HACK - m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Set(1, Normal_Index8_Short); - m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Set(1, Normal_Index8_Float); - m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Set(1, Normal_Index8_Byte3_Indices1); //HACK - m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Set(1, Normal_Index8_Byte3_Indices1); - m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Set(1, Normal_Index8_Short3_Indices1); //HACK - m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Set(1, Normal_Index8_Short3_Indices1); - m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Set(1, Normal_Index8_Float3_Indices1); - - m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Set(1, Normal_Index8_Byte); //HACK - m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Set(1, Normal_Index8_Byte); - m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Set(1, Normal_Index8_Short); //HACK - m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Set(1, Normal_Index8_Short); - m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Set(1, Normal_Index8_Float); - m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Set(3, Normal_Index8_Byte3_Indices3); //HACK - m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Set(3, Normal_Index8_Byte3_Indices3); - m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Set(3, Normal_Index8_Short3_Indices3); //HACK - m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Set(3, Normal_Index8_Short3_Indices3); - m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Set(3, Normal_Index8_Float3_Indices3); - - m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Set(2, Normal_Index16_Byte); //HACK - m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Set(2, Normal_Index16_Byte); - m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Set(2, Normal_Index16_Short); //HACK - m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Set(2, Normal_Index16_Short); - m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Set(2, Normal_Index16_Float); - m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Set(2, Normal_Index16_Byte3_Indices1); //HACK - m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Set(2, Normal_Index16_Byte3_Indices1); - m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Set(2, Normal_Index16_Short3_Indices1); //HACK - m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Set(2, Normal_Index16_Short3_Indices1); - m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Set(2, Normal_Index16_Float3_Indices1); - - m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Set(2, Normal_Index16_Byte); //HACK - m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Set(2, Normal_Index16_Byte); - m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Set(2, Normal_Index16_Short); //HACK - m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Set(2, Normal_Index16_Short); - m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Set(2, Normal_Index16_Float); - m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Set(6, Normal_Index16_Byte3_Indices3); //HACK - m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Set(6, Normal_Index16_Byte3_Indices3); - m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Set(6, Normal_Index16_Short3_Indices3); //HACK - m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Set(6, Normal_Index16_Short3_Indices3); - m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Set(6, Normal_Index16_Float3_Indices3); + m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Normal_Direct(); + m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Normal_Direct(); + m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Normal_Direct(); + m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Normal_Direct(); + m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Normal_Direct(); + m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Normal_Direct(); + m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Normal_Direct(); + m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Normal_Direct(); + m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Normal_Direct(); + m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Normal_Direct(); + + // Same as above + m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Normal_Direct(); + m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Normal_Direct(); + m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Normal_Direct(); + m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Normal_Direct(); + m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Normal_Direct(); + m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Normal_Direct(); + m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Normal_Direct(); + m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Direct(); + m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Direct(); + m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Direct(); + + m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Normal_Index(); + m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Normal_Index(); + m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Normal_Index(); + m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Normal_Index(); + m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Normal_Index(); + m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Normal_Index(); + m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Normal_Index(); + m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Normal_Index(); + m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Normal_Index(); + m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Normal_Index(); + + // Same as above for NRM_NBT + m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Normal_Index(); + m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Normal_Index(); + m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Normal_Index(); + m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Normal_Index(); + m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Normal_Index(); + m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Normal_Index_Indices3(); + m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Normal_Index_Indices3(); + m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Index_Indices3(); + m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Index_Indices3(); + m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Index_Indices3(); + + m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Normal_Index(); + m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Normal_Index(); + m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Normal_Index(); + m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Normal_Index(); + m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Normal_Index(); + m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Normal_Index(); + m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Normal_Index(); + m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Normal_Index(); + m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Normal_Index(); + m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Normal_Index(); + + // Same as above for NRM_NBT + m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Normal_Index(); + m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Normal_Index(); + m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Normal_Index(); + m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Normal_Index(); + m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Normal_Index(); + m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Normal_Index_Indices3(); + m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Normal_Index_Indices3(); + m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Index_Indices3(); + m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Index_Indices3(); + m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Index_Indices3(); } unsigned int VertexLoader_Normal::GetSize(unsigned int _type, @@ -116,312 +202,3 @@ TPipelineFunction VertexLoader_Normal::GetFunction(unsigned int _type, TPipelineFunction pFunc = m_Table[_type][_index3][_elements][_format].function; return pFunc; } - -// This fracs are fixed acording to format -#define S8FRAC 0.015625f; // 1.0f / (1U << 6) -#define S16FRAC 0.00006103515625f; // 1.0f / (1U << 14) -// --- Direct --- - -inline void ReadIndirectS8x3(const s8* pData) -{ - ((float*)VertexManager::s_pCurBufferPointer)[0] = pData[0] * S8FRAC; - ((float*)VertexManager::s_pCurBufferPointer)[1] = pData[1] * S8FRAC; - ((float*)VertexManager::s_pCurBufferPointer)[2] = pData[2] * S8FRAC; - VertexManager::s_pCurBufferPointer += 12; - LOG_NORM(); -} - -inline void ReadIndirectS8x9(const s8* pData) -{ - ((float*)VertexManager::s_pCurBufferPointer)[0] = pData[0] * S8FRAC; - ((float*)VertexManager::s_pCurBufferPointer)[1] = pData[1] * S8FRAC; - ((float*)VertexManager::s_pCurBufferPointer)[2] = pData[2] * S8FRAC; - LOG_NORM(); - ((float*)VertexManager::s_pCurBufferPointer)[3] = pData[3] * S8FRAC; - ((float*)VertexManager::s_pCurBufferPointer)[4] = pData[4] * S8FRAC; - ((float*)VertexManager::s_pCurBufferPointer)[5] = pData[5] * S8FRAC; - LOG_NORM(); - ((float*)VertexManager::s_pCurBufferPointer)[6] = pData[6] * S8FRAC; - ((float*)VertexManager::s_pCurBufferPointer)[7] = pData[7] * S8FRAC; - ((float*)VertexManager::s_pCurBufferPointer)[8] = pData[8] * S8FRAC; - LOG_NORM(); - VertexManager::s_pCurBufferPointer += 36; -} - -inline void ReadIndirectS16x3(const u16* pData) -{ - ((float*)VertexManager::s_pCurBufferPointer)[0] = ((s16)Common::swap16(pData[0])) * S16FRAC; - ((float*)VertexManager::s_pCurBufferPointer)[1] = ((s16)Common::swap16(pData[1])) * S16FRAC; - ((float*)VertexManager::s_pCurBufferPointer)[2] = ((s16)Common::swap16(pData[2])) * S16FRAC; - VertexManager::s_pCurBufferPointer += 12; - LOG_NORM() -} - -inline void ReadIndirectS16x9(const u16* pData) -{ - ((float*)VertexManager::s_pCurBufferPointer)[0] = ((s16)Common::swap16(pData[0])) * S16FRAC; - ((float*)VertexManager::s_pCurBufferPointer)[1] = ((s16)Common::swap16(pData[1])) * S16FRAC; - ((float*)VertexManager::s_pCurBufferPointer)[2] = ((s16)Common::swap16(pData[2])) * S16FRAC; - LOG_NORM() - ((float*)VertexManager::s_pCurBufferPointer)[3] = ((s16)Common::swap16(pData[3])) * S16FRAC; - ((float*)VertexManager::s_pCurBufferPointer)[4] = ((s16)Common::swap16(pData[4])) * S16FRAC; - ((float*)VertexManager::s_pCurBufferPointer)[5] = ((s16)Common::swap16(pData[5])) * S16FRAC; - LOG_NORM() - ((float*)VertexManager::s_pCurBufferPointer)[6] = ((s16)Common::swap16(pData[6])) * S16FRAC; - ((float*)VertexManager::s_pCurBufferPointer)[7] = ((s16)Common::swap16(pData[7])) * S16FRAC; - ((float*)VertexManager::s_pCurBufferPointer)[8] = ((s16)Common::swap16(pData[8])) * S16FRAC; - LOG_NORM() - VertexManager::s_pCurBufferPointer += 36; -} - -inline void ReadIndirectFloatx3(const u32* pData) -{ - ((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]); - ((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]); - ((u32*)VertexManager::s_pCurBufferPointer)[2] = Common::swap32(pData[2]); - VertexManager::s_pCurBufferPointer += 12; - LOG_NORM(); -} - -inline void ReadIndirectFloatx9(const u32* pData) -{ - ((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]); - ((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]); - ((u32*)VertexManager::s_pCurBufferPointer)[2] = Common::swap32(pData[2]); - LOG_NORM(); - ((u32*)VertexManager::s_pCurBufferPointer)[3] = Common::swap32(pData[3]); - ((u32*)VertexManager::s_pCurBufferPointer)[4] = Common::swap32(pData[4]); - ((u32*)VertexManager::s_pCurBufferPointer)[5] = Common::swap32(pData[5]); - LOG_NORM(); - ((u32*)VertexManager::s_pCurBufferPointer)[6] = Common::swap32(pData[6]); - ((u32*)VertexManager::s_pCurBufferPointer)[7] = Common::swap32(pData[7]); - ((u32*)VertexManager::s_pCurBufferPointer)[8] = Common::swap32(pData[8]); - LOG_NORM(); - VertexManager::s_pCurBufferPointer += 36; -} - -inline void ReadDirectS8x3() -{ - const s8* Source = (const s8*)DataGetPosition(); - ReadIndirectS8x3(Source); - DataSkip(3); -} - -inline void ReadDirectS8x9() -{ - const s8* Source = (const s8*)DataGetPosition(); - ReadIndirectS8x9(Source); - DataSkip(9); -} - -inline void ReadDirectS16x3() -{ - const u16* Source = (const u16*)DataGetPosition(); - ReadIndirectS16x3(Source); - DataSkip(6); -} - -inline void ReadDirectS16x9() -{ - const u16* Source = (const u16*)DataGetPosition(); - ReadIndirectS16x9(Source); - DataSkip(18); -} - -inline void ReadDirectFloatx3() -{ - const u32* Source = (const u32*)DataGetPosition(); - ReadIndirectFloatx3(Source); - DataSkip(12); -} - -inline void ReadDirectFloatx9() -{ - const u32* Source = (const u32*)DataGetPosition(); - ReadIndirectFloatx9(Source); - DataSkip(36); -} - - - -void LOADERDECL VertexLoader_Normal::Normal_DirectByte() -{ - ReadDirectS8x3(); -} - -void LOADERDECL VertexLoader_Normal::Normal_DirectShort() -{ - ReadDirectS16x3(); -} - -void LOADERDECL VertexLoader_Normal::Normal_DirectFloat() -{ - ReadDirectFloatx3(); -} - -void LOADERDECL VertexLoader_Normal::Normal_DirectByte3() -{ - ReadDirectS8x9(); -} - -void LOADERDECL VertexLoader_Normal::Normal_DirectShort3() -{ - ReadDirectS16x9(); -} - -void LOADERDECL VertexLoader_Normal::Normal_DirectFloat3() -{ - ReadDirectFloatx9(); -} - - -// --- Index8 --- - -void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte() -{ - u8 Index = DataReadU8(); - const s8* pData = (const s8 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL])); - ReadIndirectS8x3(pData); -} - -void LOADERDECL VertexLoader_Normal::Normal_Index8_Short() -{ - u8 Index = DataReadU8(); - const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL])); - ReadIndirectS16x3(pData); -} - -void LOADERDECL VertexLoader_Normal::Normal_Index8_Float() -{ - u8 Index = DataReadU8(); - const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL])); - ReadIndirectFloatx3(pData); -} - -void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte3_Indices1() -{ - u8 Index = DataReadU8(); - const s8* pData = (const s8 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL])); - ReadIndirectS8x9(pData); -} - -void LOADERDECL VertexLoader_Normal::Normal_Index8_Short3_Indices1() -{ - u8 Index = DataReadU8(); - const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL])); - ReadIndirectS16x9(pData); -} - -void LOADERDECL VertexLoader_Normal::Normal_Index8_Float3_Indices1() -{ - u8 Index = DataReadU8(); - const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL])); - ReadIndirectFloatx9(pData); -} - -void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte3_Indices3() -{ - for (int i = 0; i < 3; i++) - { - u8 Index = DataReadU8(); - const s8* pData = (const s8*)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 1*3*i); - ReadIndirectS8x3(pData); - } -} - - -void LOADERDECL VertexLoader_Normal::Normal_Index8_Short3_Indices3() -{ - for (int i = 0; i < 3; i++) - { - u8 Index = DataReadU8(); - const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 2*3*i); - ReadIndirectS16x3(pData); - } -} - -void LOADERDECL VertexLoader_Normal::Normal_Index8_Float3_Indices3() -{ - for (int i = 0; i < 3; i++) - { - u8 Index = DataReadU8(); - const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 4*3*i); - ReadIndirectFloatx3(pData); - } -} - - -// --- Index16 --- - - -void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte() -{ - u16 Index = DataReadU16(); - const s8* pData = (const s8 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL])); - ReadIndirectS8x3(pData); -} - -void LOADERDECL VertexLoader_Normal::Normal_Index16_Short() -{ - u16 Index = DataReadU16(); - const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL])); - ReadIndirectS16x3(pData); -} - -void LOADERDECL VertexLoader_Normal::Normal_Index16_Float() -{ - u16 Index = DataReadU16(); - const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL])); - ReadIndirectFloatx3(pData); -} - -void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte3_Indices1() -{ - u16 Index = DataReadU16(); - const s8* pData = (const s8 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL])); - ReadIndirectS8x9(pData); -} - -void LOADERDECL VertexLoader_Normal::Normal_Index16_Short3_Indices1() -{ - u16 Index = DataReadU16(); - const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL])); - ReadIndirectS16x9(pData); -} - -void LOADERDECL VertexLoader_Normal::Normal_Index16_Float3_Indices1() -{ - u16 Index = DataReadU16(); - const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL])); - ReadIndirectFloatx9(pData); -} - -void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte3_Indices3() -{ - for (int i = 0; i < 3; i++) - { - u16 Index = DataReadU16(); - const s8* pData = (const s8*)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 1*3*i); - ReadIndirectS8x3(pData); - } -} - -void LOADERDECL VertexLoader_Normal::Normal_Index16_Short3_Indices3() -{ - for (int i = 0; i < 3; i++) - { - u16 Index = DataReadU16(); - const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 2*3*i); - ReadIndirectS16x3(pData); - } -} - -void LOADERDECL VertexLoader_Normal::Normal_Index16_Float3_Indices3() -{ - for (int i = 0; i < 3; i++) - { - u16 Index = DataReadU16(); - const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 4*3*i); - ReadIndirectFloatx3(pData); - } -} diff --git a/Source/Core/VideoCommon/Src/VertexLoader_Normal.h b/Source/Core/VideoCommon/Src/VertexLoader_Normal.h index 934cd1ec43..d538b2a72e 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader_Normal.h +++ b/Source/Core/VideoCommon/Src/VertexLoader_Normal.h @@ -70,45 +70,20 @@ private: NUM_NRM_INDICES }; - struct Set { - Set() {} - Set(int gc_size_, TPipelineFunction function_) : gc_size(gc_size_), function(function_) {} + struct Set + { + template + void operator=(const T&) + { + gc_size = T::size; + function = T::function; + } + int gc_size; TPipelineFunction function; -// int pc_size; }; static Set m_Table[NUM_NRM_TYPE][NUM_NRM_INDICES][NUM_NRM_ELEMENTS][NUM_NRM_FORMAT]; - - // direct - static void LOADERDECL Normal_DirectByte(); - static void LOADERDECL Normal_DirectShort(); - static void LOADERDECL Normal_DirectFloat(); - static void LOADERDECL Normal_DirectByte3(); - static void LOADERDECL Normal_DirectShort3(); - static void LOADERDECL Normal_DirectFloat3(); - - // index8 - static void LOADERDECL Normal_Index8_Byte(); - static void LOADERDECL Normal_Index8_Short(); - static void LOADERDECL Normal_Index8_Float(); - static void LOADERDECL Normal_Index8_Byte3_Indices1(); - static void LOADERDECL Normal_Index8_Short3_Indices1(); - static void LOADERDECL Normal_Index8_Float3_Indices1(); - static void LOADERDECL Normal_Index8_Byte3_Indices3(); - static void LOADERDECL Normal_Index8_Short3_Indices3(); - static void LOADERDECL Normal_Index8_Float3_Indices3(); - - // index16 - static void LOADERDECL Normal_Index16_Byte(); - static void LOADERDECL Normal_Index16_Short(); - static void LOADERDECL Normal_Index16_Float(); - static void LOADERDECL Normal_Index16_Byte3_Indices1(); - static void LOADERDECL Normal_Index16_Short3_Indices1(); - static void LOADERDECL Normal_Index16_Float3_Indices1(); - static void LOADERDECL Normal_Index16_Byte3_Indices3(); - static void LOADERDECL Normal_Index16_Short3_Indices3(); - static void LOADERDECL Normal_Index16_Float3_Indices3(); }; #endif diff --git a/Source/Core/VideoCommon/Src/VertexLoader_Position.cpp b/Source/Core/VideoCommon/Src/VertexLoader_Position.cpp index 06481f9ddf..ce7a38b8c3 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader_Position.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader_Position.cpp @@ -15,6 +15,8 @@ // Official SVN repository and contact information can be found at // http://code.google.com/p/dolphin-emu/ +#include + #include "Common.h" #include "VideoCommon.h" #include "VertexLoader.h" @@ -71,101 +73,42 @@ MOVUPS(MOffset(EDI, 0), XMM0); */ -// ============================================================================== -// Direct -// ============================================================================== - -template -void Pos_ReadDirect() +template +float PosScale(T val) { - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(T)DataRead() * posScale; - ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(T)DataRead() * posScale; - if (three) - ((float*)VertexManager::s_pCurBufferPointer)[2] = (float)(T)DataRead() * posScale; - else - ((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f; + return val * posScale; +} + +template <> +float PosScale(float val) +{ return val; } + +template +void LOADERDECL Pos_ReadDirect() +{ + static_assert(N <= 3, "N > 3 is not sane!"); + + for (int i = 0; i < 3; ++i) + DataWrite(i()) : 0.f); + LOG_VTX(); - VertexManager::s_pCurBufferPointer += 12; } -void LOADERDECL Pos_ReadDirect_UByte3() { Pos_ReadDirect(); } -void LOADERDECL Pos_ReadDirect_Byte3() { Pos_ReadDirect(); } -void LOADERDECL Pos_ReadDirect_UShort3() { Pos_ReadDirect(); } -void LOADERDECL Pos_ReadDirect_Short3() { Pos_ReadDirect(); } -void LOADERDECL Pos_ReadDirect_UByte2() { Pos_ReadDirect(); } -void LOADERDECL Pos_ReadDirect_Byte2() { Pos_ReadDirect(); } -void LOADERDECL Pos_ReadDirect_UShort2() { Pos_ReadDirect(); } -void LOADERDECL Pos_ReadDirect_Short2() { Pos_ReadDirect(); } - -void LOADERDECL Pos_ReadDirect_Float3() +template +void LOADERDECL Pos_ReadIndex() { - // No need to use floating point here. - ((u32 *)VertexManager::s_pCurBufferPointer)[0] = DataReadU32(); - ((u32 *)VertexManager::s_pCurBufferPointer)[1] = DataReadU32(); - ((u32 *)VertexManager::s_pCurBufferPointer)[2] = DataReadU32(); - LOG_VTX(); - VertexManager::s_pCurBufferPointer += 12; -} - -void LOADERDECL Pos_ReadDirect_Float2() -{ - // No need to use floating point here. - ((u32 *)VertexManager::s_pCurBufferPointer)[0] = DataReadU32(); - ((u32 *)VertexManager::s_pCurBufferPointer)[1] = DataReadU32(); - ((u32 *)VertexManager::s_pCurBufferPointer)[2] = 0; - LOG_VTX(); - VertexManager::s_pCurBufferPointer += 12; -} - - -template -inline void Pos_ReadIndex_Byte(int Index) -{ - if(Index < MaxSize) + static_assert(!std::numeric_limits::is_signed, "Only unsigned I is sane!"); + static_assert(N <= 3, "N > 3 is not sane!"); + + auto const index = DataRead(); + if (index < std::numeric_limits::max()) { - const u8* pData = cached_arraybases[ARRAY_POSITION] + ((u32)Index * arraystrides[ARRAY_POSITION]); - ((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(T)(pData[0])) * posScale; - ((float*)VertexManager::s_pCurBufferPointer)[1] = ((float)(T)(pData[1])) * posScale; - if (three) - ((float*)VertexManager::s_pCurBufferPointer)[2] = ((float)(T)(pData[2])) * posScale; - else - ((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f; + auto const data = reinterpret_cast(cached_arraybases[ARRAY_POSITION] + (index * arraystrides[ARRAY_POSITION])); + + for (int i = 0; i < 3; ++i) + DataWrite(i -inline void Pos_ReadIndex_Short(int Index) -{ - if(Index < MaxSize) - { - const u16* pData = (const u16 *)(cached_arraybases[ARRAY_POSITION] + ((u32)Index * arraystrides[ARRAY_POSITION])); - ((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(T)Common::swap16(pData[0])) * posScale; - ((float*)VertexManager::s_pCurBufferPointer)[1] = ((float)(T)Common::swap16(pData[1])) * posScale; - if (three) - ((float*)VertexManager::s_pCurBufferPointer)[2] = ((float)(T)Common::swap16(pData[2])) * posScale; - else - ((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f; - LOG_VTX(); - VertexManager::s_pCurBufferPointer += 12; - } -} - -template -void Pos_ReadIndex_Float(int Index) -{ - if(Index < MaxSize) - { - const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (Index * arraystrides[ARRAY_POSITION])); - ((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]); - ((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]); - if (three) - ((u32*)VertexManager::s_pCurBufferPointer)[2] = Common::swap32(pData[2]); - else - ((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f; - LOG_VTX(); - VertexManager::s_pCurBufferPointer += 12; } } @@ -173,87 +116,22 @@ void Pos_ReadIndex_Float(int Index) static const __m128i kMaskSwap32_3 = _mm_set_epi32(0xFFFFFFFFL, 0x08090A0BL, 0x04050607L, 0x00010203L); static const __m128i kMaskSwap32_2 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x04050607L, 0x00010203L); -template -void Pos_ReadIndex_Float_SSSE3(int Index) +template +void LOADERDECL Pos_ReadIndex_Float_SSSE3() { - if(Index < MaxSize) + auto const index = DataRead(); + if (index < std::numeric_limits::max()) { - const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (Index * arraystrides[ARRAY_POSITION])); + const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (index * arraystrides[ARRAY_POSITION])); GC_ALIGNED128(const __m128i a = _mm_loadu_si128((__m128i*)pData)); GC_ALIGNED128(__m128i b = _mm_shuffle_epi8(a, three ? kMaskSwap32_3 : kMaskSwap32_2)); _mm_storeu_si128((__m128i*)VertexManager::s_pCurBufferPointer, b); + VertexManager::s_pCurBufferPointer += sizeof(float) * 3; LOG_VTX(); - VertexManager::s_pCurBufferPointer += 12; } } #endif -// Explicitly instantiate these functions to decrease the possibility of -// symbol binding problems when (only) calling them from JIT compiled code. -template void Pos_ReadDirect(); -template void Pos_ReadDirect(); -template void Pos_ReadDirect(); -template void Pos_ReadDirect(); -template void Pos_ReadDirect(); -template void Pos_ReadDirect(); -template void Pos_ReadDirect(); -template void Pos_ReadDirect(); -template void Pos_ReadIndex_Byte(int Index); -template void Pos_ReadIndex_Byte(int Index); -template void Pos_ReadIndex_Short(int Index); -template void Pos_ReadIndex_Short(int Index); -template void Pos_ReadIndex_Float(int Index); -template void Pos_ReadIndex_Byte(int Index); -template void Pos_ReadIndex_Byte(int Index); -template void Pos_ReadIndex_Short(int Index); -template void Pos_ReadIndex_Short(int Index); -template void Pos_ReadIndex_Float(int Index); -template void Pos_ReadIndex_Byte(int Index); -template void Pos_ReadIndex_Byte(int Index); -template void Pos_ReadIndex_Short(int Index); -template void Pos_ReadIndex_Short(int Index); -template void Pos_ReadIndex_Float(int Index); -template void Pos_ReadIndex_Byte(int Index); -template void Pos_ReadIndex_Byte(int Index); -template void Pos_ReadIndex_Short(int Index); -template void Pos_ReadIndex_Short(int Index); -template void Pos_ReadIndex_Float(int Index); - -// ============================================================================== -// Index 8 -// ============================================================================== -void LOADERDECL Pos_ReadIndex8_UByte3() {Pos_ReadIndex_Byte (DataReadU8());} -void LOADERDECL Pos_ReadIndex8_Byte3() {Pos_ReadIndex_Byte (DataReadU8());} -void LOADERDECL Pos_ReadIndex8_UShort3() {Pos_ReadIndex_Short (DataReadU8());} -void LOADERDECL Pos_ReadIndex8_Short3() {Pos_ReadIndex_Short (DataReadU8());} -void LOADERDECL Pos_ReadIndex8_Float3() {Pos_ReadIndex_Float (DataReadU8());} -void LOADERDECL Pos_ReadIndex8_UByte2() {Pos_ReadIndex_Byte(DataReadU8());} -void LOADERDECL Pos_ReadIndex8_Byte2() {Pos_ReadIndex_Byte(DataReadU8());} -void LOADERDECL Pos_ReadIndex8_UShort2() {Pos_ReadIndex_Short(DataReadU8());} -void LOADERDECL Pos_ReadIndex8_Short2() {Pos_ReadIndex_Short(DataReadU8());} -void LOADERDECL Pos_ReadIndex8_Float2() {Pos_ReadIndex_Float (DataReadU8());} - -// ============================================================================== -// Index 16 -// ============================================================================== -void LOADERDECL Pos_ReadIndex16_UByte3() {Pos_ReadIndex_Byte (DataReadU16());} -void LOADERDECL Pos_ReadIndex16_Byte3() {Pos_ReadIndex_Byte (DataReadU16());} -void LOADERDECL Pos_ReadIndex16_UShort3() {Pos_ReadIndex_Short (DataReadU16());} -void LOADERDECL Pos_ReadIndex16_Short3() {Pos_ReadIndex_Short (DataReadU16());} -void LOADERDECL Pos_ReadIndex16_Float3() {Pos_ReadIndex_Float (DataReadU16());} -void LOADERDECL Pos_ReadIndex16_UByte2() {Pos_ReadIndex_Byte(DataReadU16());} -void LOADERDECL Pos_ReadIndex16_Byte2() {Pos_ReadIndex_Byte(DataReadU16());} -void LOADERDECL Pos_ReadIndex16_UShort2() {Pos_ReadIndex_Short(DataReadU16());} -void LOADERDECL Pos_ReadIndex16_Short2() {Pos_ReadIndex_Short(DataReadU16());} -void LOADERDECL Pos_ReadIndex16_Float2() {Pos_ReadIndex_Float (DataReadU16());} - -#if _M_SSE >= 0x301 -void LOADERDECL Pos_ReadIndex8_Float3_SSSE3() {Pos_ReadIndex_Float_SSSE3 (DataReadU8());} -void LOADERDECL Pos_ReadIndex8_Float2_SSSE3() {Pos_ReadIndex_Float_SSSE3 (DataReadU8());} -void LOADERDECL Pos_ReadIndex16_Float3_SSSE3() {Pos_ReadIndex_Float_SSSE3 (DataReadU16());} -void LOADERDECL Pos_ReadIndex16_Float2_SSSE3() {Pos_ReadIndex_Float_SSSE3 (DataReadU16());} -#endif - static TPipelineFunction tableReadPosition[4][8][2] = { { {NULL, NULL,}, @@ -263,56 +141,40 @@ static TPipelineFunction tableReadPosition[4][8][2] = { {NULL, NULL,}, }, { - {Pos_ReadDirect_UByte2, Pos_ReadDirect_UByte3,}, - {Pos_ReadDirect_Byte2, Pos_ReadDirect_Byte3,}, - {Pos_ReadDirect_UShort2, Pos_ReadDirect_UShort3,}, - {Pos_ReadDirect_Short2, Pos_ReadDirect_Short3,}, - {Pos_ReadDirect_Float2, Pos_ReadDirect_Float3,}, + {Pos_ReadDirect, Pos_ReadDirect,}, + {Pos_ReadDirect, Pos_ReadDirect,}, + {Pos_ReadDirect, Pos_ReadDirect,}, + {Pos_ReadDirect, Pos_ReadDirect,}, + {Pos_ReadDirect, Pos_ReadDirect,}, }, { - {Pos_ReadIndex8_UByte2, Pos_ReadIndex8_UByte3,}, - {Pos_ReadIndex8_Byte2, Pos_ReadIndex8_Byte3,}, - {Pos_ReadIndex8_UShort2, Pos_ReadIndex8_UShort3,}, - {Pos_ReadIndex8_Short2, Pos_ReadIndex8_Short3,}, - {Pos_ReadIndex8_Float2, Pos_ReadIndex8_Float3,}, + {Pos_ReadIndex, Pos_ReadIndex,}, + {Pos_ReadIndex, Pos_ReadIndex,}, + {Pos_ReadIndex, Pos_ReadIndex,}, + {Pos_ReadIndex, Pos_ReadIndex,}, + {Pos_ReadIndex, Pos_ReadIndex,}, }, { - {Pos_ReadIndex16_UByte2, Pos_ReadIndex16_UByte3,}, - {Pos_ReadIndex16_Byte2, Pos_ReadIndex16_Byte3,}, - {Pos_ReadIndex16_UShort2, Pos_ReadIndex16_UShort3,}, - {Pos_ReadIndex16_Short2, Pos_ReadIndex16_Short3,}, - {Pos_ReadIndex16_Float2, Pos_ReadIndex16_Float3,}, + {Pos_ReadIndex, Pos_ReadIndex,}, + {Pos_ReadIndex, Pos_ReadIndex,}, + {Pos_ReadIndex, Pos_ReadIndex,}, + {Pos_ReadIndex, Pos_ReadIndex,}, + {Pos_ReadIndex, Pos_ReadIndex,}, }, }; static int tableReadPositionVertexSize[4][8][2] = { { - {0, 0,}, - {0, 0,}, - {0, 0,}, - {0, 0,}, - {0, 0,}, + {0, 0,}, {0, 0,}, {0, 0,}, {0, 0,}, {0, 0,}, }, { - {2, 3,}, - {2, 3,}, - {4, 6,}, - {4, 6,}, - {8, 12,}, + {2, 3,}, {2, 3,}, {4, 6,}, {4, 6,}, {8, 12,}, }, { - {1, 1,}, - {1, 1,}, - {1, 1,}, - {1, 1,}, - {1, 1,}, + {1, 1,}, {1, 1,}, {1, 1,}, {1, 1,}, {1, 1,}, }, { - {2, 2,}, - {2, 2,}, - {2, 2,}, - {2, 2,}, - {2, 2,}, + {2, 2,}, {2, 2,}, {2, 2,}, {2, 2,}, {2, 2,}, }, }; @@ -322,10 +184,10 @@ void VertexLoader_Position::Init(void) { #if _M_SSE >= 0x301 if (cpu_info.bSSSE3) { - tableReadPosition[2][4][0] = Pos_ReadIndex8_Float2_SSSE3; - tableReadPosition[2][4][1] = Pos_ReadIndex8_Float3_SSSE3; - tableReadPosition[3][4][0] = Pos_ReadIndex16_Float2_SSSE3; - tableReadPosition[3][4][1] = Pos_ReadIndex16_Float3_SSSE3; + tableReadPosition[2][4][0] = Pos_ReadIndex_Float_SSSE3; + tableReadPosition[2][4][1] = Pos_ReadIndex_Float_SSSE3; + tableReadPosition[3][4][0] = Pos_ReadIndex_Float_SSSE3; + tableReadPosition[3][4][1] = Pos_ReadIndex_Float_SSSE3; } #endif diff --git a/Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp b/Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp index ba3bb73f43..4be24640b3 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader_TextCoord.cpp @@ -28,8 +28,22 @@ #include #endif -#define LOG_TEX1() // PRIM_LOG("tex: %f, ", ((float*)VertexManager::s_pCurBufferPointer)[0]); -#define LOG_TEX2() // PRIM_LOG("tex: %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[0], ((float*)VertexManager::s_pCurBufferPointer)[1]); +template +void LOG_TEX(); + +template <> +__forceinline void LOG_TEX<1>() +{ + // warning: mapping buffer should be disabled to use this + // PRIM_LOG("tex: %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-1]); +} + +template <> +__forceinline void LOG_TEX<2>() +{ + // warning: mapping buffer should be disabled to use this + // PRIM_LOG("tex: %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-2], ((float*)VertexManager::s_pCurBufferPointer)[-1]); +} extern int tcIndex; extern float tcScale[8]; @@ -39,279 +53,54 @@ void LOADERDECL TexCoord_Read_Dummy() tcIndex++; } -void LOADERDECL TexCoord_ReadDirect_UByte1() +template +float TCScale(T val) { - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU8() * tcScale[tcIndex]; - LOG_TEX1(); - VertexManager::s_pCurBufferPointer += 4; - tcIndex++; -} -void LOADERDECL TexCoord_ReadDirect_UByte2() -{ - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU8() * tcScale[tcIndex]; - ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)DataReadU8() * tcScale[tcIndex]; - LOG_TEX2(); - VertexManager::s_pCurBufferPointer += 8; - tcIndex++; + return val * tcScale[tcIndex]; } -void LOADERDECL TexCoord_ReadDirect_Byte1() +template <> +float TCScale(float val) +{ return val; } + +template +void LOADERDECL TexCoord_ReadDirect() { - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)DataReadU8() * tcScale[tcIndex]; - LOG_TEX1(); - VertexManager::s_pCurBufferPointer += 4; - tcIndex++; -} -void LOADERDECL TexCoord_ReadDirect_Byte2() -{ - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)DataReadU8() * tcScale[tcIndex]; - ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s8)DataReadU8() * tcScale[tcIndex]; - LOG_TEX2(); - VertexManager::s_pCurBufferPointer += 8; - tcIndex++; + for (int i = 0; i != N; ++i) + DataWrite(TCScale(DataRead())); + + LOG_TEX(); + + ++tcIndex; } -void LOADERDECL TexCoord_ReadDirect_UShort1() +template +void LOADERDECL TexCoord_ReadIndex() { - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU16() * tcScale[tcIndex]; - LOG_TEX1(); - VertexManager::s_pCurBufferPointer += 4; - tcIndex++; -} -void LOADERDECL TexCoord_ReadDirect_UShort2() -{ - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU16() * tcScale[tcIndex]; - ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)DataReadU16() * tcScale[tcIndex]; - LOG_TEX2(); - VertexManager::s_pCurBufferPointer += 8; - tcIndex++; -} - -void LOADERDECL TexCoord_ReadDirect_Short1() -{ - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)DataReadU16() * tcScale[tcIndex]; - LOG_TEX1(); - VertexManager::s_pCurBufferPointer += 4; - tcIndex++; -} -void LOADERDECL TexCoord_ReadDirect_Short2() -{ - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)DataReadU16() * tcScale[tcIndex]; - ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s16)DataReadU16() * tcScale[tcIndex]; - LOG_TEX2(); - VertexManager::s_pCurBufferPointer += 8; - tcIndex++; -} - -void LOADERDECL TexCoord_ReadDirect_Float1() -{ - ((u32*)VertexManager::s_pCurBufferPointer)[0] = DataReadU32(); - LOG_TEX1(); - VertexManager::s_pCurBufferPointer += 4; - tcIndex++; -} -void LOADERDECL TexCoord_ReadDirect_Float2() -{ - ((u32*)VertexManager::s_pCurBufferPointer)[0] = DataReadU32(); - ((u32*)VertexManager::s_pCurBufferPointer)[1] = DataReadU32(); - LOG_TEX2(); - VertexManager::s_pCurBufferPointer += 8; - tcIndex++; -} - -// ================================================================================== -void LOADERDECL TexCoord_ReadIndex8_UByte1() -{ - u8 Index = DataReadU8(); - const u8 *pData = cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]); - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(*pData) * tcScale[tcIndex]; - LOG_TEX1(); - VertexManager::s_pCurBufferPointer += 4; - tcIndex++; -} -void LOADERDECL TexCoord_ReadIndex8_UByte2() -{ - u8 Index = DataReadU8(); - const u8 *pData = cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]); - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u8)(pData[0]) * tcScale[tcIndex]; - ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(u8)(pData[1]) * tcScale[tcIndex]; - LOG_TEX2(); - VertexManager::s_pCurBufferPointer += 8; - tcIndex++; -} - -void LOADERDECL TexCoord_ReadIndex8_Byte1() -{ - u8 Index = DataReadU8(); - const u8 *pData = cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]); - - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)(*pData) * tcScale[tcIndex]; - LOG_TEX1(); - VertexManager::s_pCurBufferPointer += 4; - tcIndex++; -} -void LOADERDECL TexCoord_ReadIndex8_Byte2() -{ - u8 Index = DataReadU8(); - const u8 *pData = cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]); - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)(pData[0]) * tcScale[tcIndex]; - ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s8)(pData[1]) * tcScale[tcIndex]; - LOG_TEX2(); - VertexManager::s_pCurBufferPointer += 8; - tcIndex++; -} - -void LOADERDECL TexCoord_ReadIndex8_UShort1() -{ - u8 Index = DataReadU8(); - const u16 *pData = (const u16 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex])); - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u16)Common::swap16(*pData) * tcScale[tcIndex]; - LOG_TEX1(); - VertexManager::s_pCurBufferPointer += 4; - tcIndex++; -} -void LOADERDECL TexCoord_ReadIndex8_UShort2() -{ - u8 Index = DataReadU8(); - const u16 *pData = (const u16 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex])); - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u16)Common::swap16(pData[0]) * tcScale[tcIndex]; - ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(u16)Common::swap16(pData[1]) * tcScale[tcIndex]; - LOG_TEX2(); - VertexManager::s_pCurBufferPointer += 8; - tcIndex++; -} - -void LOADERDECL TexCoord_ReadIndex8_Short1() -{ - u8 Index = DataReadU8(); - const u16 *pData = (const u16 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex])); - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)Common::swap16(pData[0]) * tcScale[tcIndex]; - LOG_TEX1(); - VertexManager::s_pCurBufferPointer += 4; - tcIndex++; -} -void LOADERDECL TexCoord_ReadIndex8_Short2() -{ - u8 Index = DataReadU8(); - const u16 *pData = (const u16 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex])); - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)Common::swap16(pData[0]) * tcScale[tcIndex]; - ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s16)Common::swap16(pData[1]) * tcScale[tcIndex]; - LOG_TEX2(); - VertexManager::s_pCurBufferPointer += 8; - tcIndex++; -} - -void LOADERDECL TexCoord_ReadIndex8_Float1() -{ - u16 Index = DataReadU8(); - const u32 *pData = (const u32 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex])); - ((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]); - LOG_TEX1(); - VertexManager::s_pCurBufferPointer += 4; - tcIndex++; -} -void LOADERDECL TexCoord_ReadIndex8_Float2() -{ - u16 Index = DataReadU8(); - const u32 *pData = (const u32 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex])); - ((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]); - ((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]); - LOG_TEX2(); - VertexManager::s_pCurBufferPointer += 8; - tcIndex++; -} - -// ================================================================================== -void LOADERDECL TexCoord_ReadIndex16_UByte1() -{ - u16 Index = DataReadU16(); - const u8 *pData = cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]); - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u8)(pData[0]) * tcScale[tcIndex]; - LOG_TEX1(); - VertexManager::s_pCurBufferPointer += 4; - tcIndex++; -} -void LOADERDECL TexCoord_ReadIndex16_UByte2() -{ - u16 Index = DataReadU16(); - const u8 *pData = cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]); - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u8)(pData[0]) * tcScale[tcIndex]; - ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(u8)(pData[1]) * tcScale[tcIndex]; - LOG_TEX2(); - VertexManager::s_pCurBufferPointer += 8; - tcIndex++; -} - -void LOADERDECL TexCoord_ReadIndex16_Byte1() -{ - u16 Index = DataReadU16(); - const u8 *pData = cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]); - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)(pData[0]) * tcScale[tcIndex]; - LOG_TEX1(); - VertexManager::s_pCurBufferPointer += 4; - tcIndex++; -} -void LOADERDECL TexCoord_ReadIndex16_Byte2() -{ - u16 Index = DataReadU16(); - const u8 *pData = cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]); - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)(pData[0]) * tcScale[tcIndex]; - ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s8)(pData[1]) * tcScale[tcIndex]; - LOG_TEX2(); - VertexManager::s_pCurBufferPointer += 8; - tcIndex++; -} - -void LOADERDECL TexCoord_ReadIndex16_UShort1() -{ - u16 Index = DataReadU16(); - const u16* pData = (const u16 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex])); - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u16)Common::swap16(pData[0]) * tcScale[tcIndex]; - LOG_TEX1(); - VertexManager::s_pCurBufferPointer += 4; - tcIndex++; -} -void LOADERDECL TexCoord_ReadIndex16_UShort2() -{ - u16 Index = DataReadU16(); - const u16* pData = (const u16 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex])); - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u16)Common::swap16(pData[0]) * tcScale[tcIndex]; - ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(u16)Common::swap16(pData[1]) * tcScale[tcIndex]; - LOG_TEX2(); - VertexManager::s_pCurBufferPointer += 8; - tcIndex++; -} - -void LOADERDECL TexCoord_ReadIndex16_Short1() -{ - u16 Index = DataReadU16(); - const u16 *pData = (const u16 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex])); - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)Common::swap16(*pData) * tcScale[tcIndex]; - LOG_TEX1(); - VertexManager::s_pCurBufferPointer += 4; - tcIndex++; -} - -void LOADERDECL TexCoord_ReadIndex16_Short2() -{ - // Heavy in ZWW - u16 Index = DataReadU16(); - const u16 *pData = (const u16 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex])); - ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)Common::swap16(pData[0]) * tcScale[tcIndex]; - ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s16)Common::swap16(pData[1]) * tcScale[tcIndex]; - LOG_TEX2(); - VertexManager::s_pCurBufferPointer += 8; - tcIndex++; + static_assert(!std::numeric_limits::is_signed, "Only unsigned I is sane!"); + + auto const index = DataRead(); + auto const data = reinterpret_cast(cached_arraybases[ARRAY_TEXCOORD0 + tcIndex] + + (index * arraystrides[ARRAY_TEXCOORD0 + tcIndex])); + + for (int i = 0; i != N; ++i) + DataWrite(TCScale(Common::FromBigEndian(data[i]))); + + LOG_TEX(); + ++tcIndex; } #if _M_SSE >= 0x401 static const __m128i kMaskSwap16_2 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFL, 0x02030001L); -void LOADERDECL TexCoord_ReadIndex16_Short2_SSE4() +template +void LOADERDECL TexCoord_ReadIndex_Short2_SSE4() { + static_assert(!std::numeric_limits::is_signed, "Only unsigned I is sane!"); + // Heavy in ZWW - u16 Index = DataReadU16(); - const s32 *pData = (const s32*)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex])); + auto const index = DataRead(); + const s32 *pData = (const s32*)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (index * arraystrides[ARRAY_TEXCOORD0+tcIndex])); const __m128i a = _mm_cvtsi32_si128(*pData); const __m128i b = _mm_shuffle_epi8(a, kMaskSwap16_2); const __m128i c = _mm_cvtepi16_epi32(b); @@ -319,47 +108,27 @@ void LOADERDECL TexCoord_ReadIndex16_Short2_SSE4() const __m128 e = _mm_load1_ps(&tcScale[tcIndex]); const __m128 f = _mm_mul_ps(d, e); _mm_storeu_ps((float*)VertexManager::s_pCurBufferPointer, f); - LOG_TEX2(); - VertexManager::s_pCurBufferPointer += 8; + VertexManager::s_pCurBufferPointer += sizeof(float) * 2; + LOG_TEX<2>(); tcIndex++; } #endif -void LOADERDECL TexCoord_ReadIndex16_Float1() -{ - u16 Index = DataReadU16(); - const u32 *pData = (const u32 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex])); - ((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]); - LOG_TEX1(); - VertexManager::s_pCurBufferPointer += 4; - tcIndex++; -} - -void LOADERDECL TexCoord_ReadIndex16_Float2() -{ - u16 Index = DataReadU16(); - const u32 *pData = (const u32 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex])); - ((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]); - ((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]); - LOG_TEX2(); - VertexManager::s_pCurBufferPointer += 8; - tcIndex++; -} - #if _M_SSE >= 0x301 static const __m128i kMaskSwap32 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x04050607L, 0x00010203L); -void LOADERDECL TexCoord_ReadIndex16_Float2_SSSE3() +template +void LOADERDECL TexCoord_ReadIndex_Float2_SSSE3() { - u16 Index = DataReadU16(); - const u32 *pData = (const u32 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex])); + static_assert(!std::numeric_limits::is_signed, "Only unsigned I is sane!"); + + auto const index = DataRead(); + const u32 *pData = (const u32 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (index * arraystrides[ARRAY_TEXCOORD0+tcIndex])); GC_ALIGNED128(const __m128i a = _mm_loadl_epi64((__m128i*)pData)); GC_ALIGNED128(const __m128i b = _mm_shuffle_epi8(a, kMaskSwap32)); - u8* p = VertexManager::s_pCurBufferPointer; - _mm_storel_epi64((__m128i*)p, b); - LOG_TEX2(); - p += 8; - VertexManager::s_pCurBufferPointer = p; + _mm_storel_epi64((__m128i*)VertexManager::s_pCurBufferPointer, b); + VertexManager::s_pCurBufferPointer += sizeof(float) * 2; + LOG_TEX<2>(); tcIndex++; } #endif @@ -373,56 +142,40 @@ static TPipelineFunction tableReadTexCoord[4][8][2] = { {NULL, NULL,}, }, { - {TexCoord_ReadDirect_UByte1, TexCoord_ReadDirect_UByte2,}, - {TexCoord_ReadDirect_Byte1, TexCoord_ReadDirect_Byte2,}, - {TexCoord_ReadDirect_UShort1, TexCoord_ReadDirect_UShort2,}, - {TexCoord_ReadDirect_Short1, TexCoord_ReadDirect_Short2,}, - {TexCoord_ReadDirect_Float1, TexCoord_ReadDirect_Float2,}, + {TexCoord_ReadDirect, TexCoord_ReadDirect,}, + {TexCoord_ReadDirect, TexCoord_ReadDirect,}, + {TexCoord_ReadDirect, TexCoord_ReadDirect,}, + {TexCoord_ReadDirect, TexCoord_ReadDirect,}, + {TexCoord_ReadDirect, TexCoord_ReadDirect,}, }, { - {TexCoord_ReadIndex8_UByte1, TexCoord_ReadIndex8_UByte2,}, - {TexCoord_ReadIndex8_Byte1, TexCoord_ReadIndex8_Byte2,}, - {TexCoord_ReadIndex8_UShort1, TexCoord_ReadIndex8_UShort2,}, - {TexCoord_ReadIndex8_Short1, TexCoord_ReadIndex8_Short2,}, - {TexCoord_ReadIndex8_Float1, TexCoord_ReadIndex8_Float2,}, + {TexCoord_ReadIndex, TexCoord_ReadIndex,}, + {TexCoord_ReadIndex, TexCoord_ReadIndex,}, + {TexCoord_ReadIndex, TexCoord_ReadIndex,}, + {TexCoord_ReadIndex, TexCoord_ReadIndex,}, + {TexCoord_ReadIndex, TexCoord_ReadIndex,}, }, { - {TexCoord_ReadIndex16_UByte1, TexCoord_ReadIndex16_UByte2,}, - {TexCoord_ReadIndex16_Byte1, TexCoord_ReadIndex16_Byte2,}, - {TexCoord_ReadIndex16_UShort1, TexCoord_ReadIndex16_UShort2,}, - {TexCoord_ReadIndex16_Short1, TexCoord_ReadIndex16_Short2,}, - {TexCoord_ReadIndex16_Float1, TexCoord_ReadIndex16_Float2,}, + {TexCoord_ReadIndex, TexCoord_ReadIndex,}, + {TexCoord_ReadIndex, TexCoord_ReadIndex,}, + {TexCoord_ReadIndex, TexCoord_ReadIndex,}, + {TexCoord_ReadIndex, TexCoord_ReadIndex,}, + {TexCoord_ReadIndex, TexCoord_ReadIndex,}, }, }; static int tableReadTexCoordVertexSize[4][8][2] = { { - {0, 0,}, - {0, 0,}, - {0, 0,}, - {0, 0,}, - {0, 0,}, + {0, 0,}, {0, 0,}, {0, 0,}, {0, 0,}, {0, 0,}, }, { - {1, 2,}, - {1, 2,}, - {2, 4,}, - {2, 4,}, - {4, 8,}, + {1, 2,}, {1, 2,}, {2, 4,}, {2, 4,}, {4, 8,}, }, { - {1, 1,}, - {1, 1,}, - {1, 1,}, - {1, 1,}, - {1, 1,}, + {1, 1,}, {1, 1,}, {1, 1,}, {1, 1,}, {1, 1,}, }, { - {2, 2,}, - {2, 2,}, - {2, 2,}, - {2, 2,}, - {2, 2,}, + {2, 2,}, {2, 2,}, {2, 2,}, {2, 2,}, {2, 2,}, }, }; @@ -430,16 +183,20 @@ void VertexLoader_TextCoord::Init(void) { #if _M_SSE >= 0x301 - if (cpu_info.bSSSE3) { - tableReadTexCoord[3][4][1] = TexCoord_ReadIndex16_Float2_SSSE3; + if (cpu_info.bSSSE3) + { + tableReadTexCoord[2][4][1] = TexCoord_ReadIndex_Float2_SSSE3; + tableReadTexCoord[3][4][1] = TexCoord_ReadIndex_Float2_SSSE3; } #endif #if _M_SSE >= 0x401 - if (cpu_info.bSSE4_1) { - tableReadTexCoord[3][3][1] = TexCoord_ReadIndex16_Short2_SSE4; + if (cpu_info.bSSE4_1) + { + tableReadTexCoord[2][3][1] = TexCoord_ReadIndex_Short2_SSE4; + tableReadTexCoord[3][3][1] = TexCoord_ReadIndex_Short2_SSE4; } #endif diff --git a/Source/Core/VideoCommon/Src/VertexManagerBase.cpp b/Source/Core/VideoCommon/Src/VertexManagerBase.cpp index ceffcda618..54830bcbea 100644 --- a/Source/Core/VideoCommon/Src/VertexManagerBase.cpp +++ b/Source/Core/VideoCommon/Src/VertexManagerBase.cpp @@ -12,171 +12,120 @@ #include "BPStructs.h" #include "VertexManagerBase.h" +#include "MainBase.h" #include "VideoConfig.h" VertexManager *g_vertex_manager; u8 *VertexManager::s_pCurBufferPointer; u8 *VertexManager::s_pBaseBufferPointer; - -u8 *VertexManager::LocalVBuffer; -u16 *VertexManager::TIBuffer; -u16 *VertexManager::LIBuffer; -u16 *VertexManager::PIBuffer; - -bool VertexManager::Flushed; +u8 *VertexManager::s_pEndBufferPointer; VertexManager::VertexManager() { - Flushed = false; + LocalVBuffer.resize(MAXVBUFFERSIZE); + s_pCurBufferPointer = s_pBaseBufferPointer = &LocalVBuffer[0]; + s_pEndBufferPointer = s_pBaseBufferPointer + LocalVBuffer.size(); - LocalVBuffer = new u8[MAXVBUFFERSIZE]; - s_pCurBufferPointer = s_pBaseBufferPointer = LocalVBuffer; + TIBuffer.resize(MAXIBUFFERSIZE); + LIBuffer.resize(MAXIBUFFERSIZE); + PIBuffer.resize(MAXIBUFFERSIZE); - TIBuffer = new u16[MAXIBUFFERSIZE]; - LIBuffer = new u16[MAXIBUFFERSIZE]; - PIBuffer = new u16[MAXIBUFFERSIZE]; - - IndexGenerator::Start(TIBuffer, LIBuffer, PIBuffer); -} - -void VertexManager::ResetBuffer() -{ - s_pCurBufferPointer = LocalVBuffer; -} - -VertexManager::~VertexManager() -{ - delete[] LocalVBuffer; - - delete[] TIBuffer; - delete[] LIBuffer; - delete[] PIBuffer; - - // TODO: necessary?? ResetBuffer(); } -void VertexManager::AddIndices(int primitive, int numVertices) -{ - //switch (primitive) - //{ - //case GX_DRAW_QUADS: IndexGenerator::AddQuads(numVertices); break; - //case GX_DRAW_TRIANGLES: IndexGenerator::AddList(numVertices); break; - //case GX_DRAW_TRIANGLE_STRIP: IndexGenerator::AddStrip(numVertices); break; - //case GX_DRAW_TRIANGLE_FAN: IndexGenerator::AddFan(numVertices); break; - //case GX_DRAW_LINES: IndexGenerator::AddLineList(numVertices); break; - //case GX_DRAW_LINE_STRIP: IndexGenerator::AddLineStrip(numVertices); break; - //case GX_DRAW_POINTS: IndexGenerator::AddPoints(numVertices); break; - //} +VertexManager::~VertexManager() +{} - static void (*const primitive_table[])(int) = +void VertexManager::ResetBuffer() +{ + s_pCurBufferPointer = s_pBaseBufferPointer; + IndexGenerator::Start(GetTriangleIndexBuffer(), GetLineIndexBuffer(), GetPointIndexBuffer()); +} + +u32 VertexManager::GetRemainingSize() +{ + return (u32)(s_pEndBufferPointer - s_pCurBufferPointer); +} + +void VertexManager::PrepareForAdditionalData(int primitive, u32 count, u32 stride) +{ + u32 const needed_vertex_bytes = count * stride; + + if (needed_vertex_bytes > GetRemainingSize() || count > GetRemainingIndices(primitive)) { - IndexGenerator::AddQuads, - NULL, - IndexGenerator::AddList, - IndexGenerator::AddStrip, - IndexGenerator::AddFan, - IndexGenerator::AddLineList, - IndexGenerator::AddLineStrip, - IndexGenerator::AddPoints, - }; - - primitive_table[primitive](numVertices); + Flush(); + + if (needed_vertex_bytes > GetRemainingSize()) + ERROR_LOG(VIDEO, "VertexManager: Buffer not large enough for all vertices! " + "Increase MAXVBUFFERSIZE or we need primitive breaking afterall."); + if (count > GetRemainingIndices(primitive)) + ERROR_LOG(VIDEO, "VertexManager: Buffer not large enough for all indices! " + "Increase MAXIBUFFERSIZE or we need primitive breaking afterall."); + } } -int VertexManager::GetRemainingSize() +bool VertexManager::IsFlushed() const { - return MAXVBUFFERSIZE - (int)(s_pCurBufferPointer - LocalVBuffer); + return s_pBaseBufferPointer == s_pCurBufferPointer; } -int VertexManager::GetRemainingVertices(int primitive) +u32 VertexManager::GetRemainingIndices(int primitive) { switch (primitive) { case GX_DRAW_QUADS: + return (MAXIBUFFERSIZE - IndexGenerator::GetTriangleindexLen()) / 6 * 4; case GX_DRAW_TRIANGLES: + return (MAXIBUFFERSIZE - IndexGenerator::GetTriangleindexLen()); case GX_DRAW_TRIANGLE_STRIP: + return (MAXIBUFFERSIZE - IndexGenerator::GetTriangleindexLen()) / 3 + 2; case GX_DRAW_TRIANGLE_FAN: - return (MAXIBUFFERSIZE - IndexGenerator::GetTriangleindexLen()) / 3; - break; + return (MAXIBUFFERSIZE - IndexGenerator::GetTriangleindexLen()) / 3 + 2; case GX_DRAW_LINES: + return (MAXIBUFFERSIZE - IndexGenerator::GetLineindexLen()); case GX_DRAW_LINE_STRIP: - return (MAXIBUFFERSIZE - IndexGenerator::GetLineindexLen()) / 2; - break; + return (MAXIBUFFERSIZE - IndexGenerator::GetLineindexLen()) / 2 + 1; case GX_DRAW_POINTS: return (MAXIBUFFERSIZE - IndexGenerator::GetPointindexLen()); - break; default: return 0; - break; } } -void VertexManager::AddVertices(int primitive, int numVertices) +void VertexManager::AddVertices(int primitive, u32 numVertices) { if (numVertices <= 0) return; - switch (primitive) - { - case GX_DRAW_QUADS: - case GX_DRAW_TRIANGLES: - case GX_DRAW_TRIANGLE_STRIP: - case GX_DRAW_TRIANGLE_FAN: - if (MAXIBUFFERSIZE - IndexGenerator::GetTriangleindexLen() < 3 * numVertices) - Flush(); - break; - - case GX_DRAW_LINES: - case GX_DRAW_LINE_STRIP: - if (MAXIBUFFERSIZE - IndexGenerator::GetLineindexLen() < 2 * numVertices) - Flush(); - break; - - case GX_DRAW_POINTS: - if (MAXIBUFFERSIZE - IndexGenerator::GetPointindexLen() < numVertices) - Flush(); - break; - - default: - return; - break; - } - - if (Flushed) - { - IndexGenerator::Start(TIBuffer, LIBuffer, PIBuffer); - Flushed = false; - } - ADDSTAT(stats.thisFrame.numPrims, numVertices); INCSTAT(stats.thisFrame.numPrimitiveJoins); - AddIndices(primitive, numVertices); + + IndexGenerator::AddIndices(primitive, numVertices); } void VertexManager::Flush() { + if (g_vertex_manager->IsFlushed()) + return; + // loading a state will invalidate BP, so check for it g_video_backend->CheckInvalidState(); + VideoFifo_CheckEFBAccess(); + g_vertex_manager->vFlush(); + + g_vertex_manager->ResetBuffer(); } // TODO: need to merge more stuff into VideoCommon to use this #if (0) void VertexManager::Flush() { - if (LocalVBuffer == s_pCurBufferPointer || Flushed) - return; - - Flushed = true; - - VideoFifo_CheckEFBAccess(); - #if defined(_DEBUG) || defined(DEBUGFAST) PRIM_LOG("frame%d:\n texgen=%d, numchan=%d, dualtex=%d, ztex=%d, cole=%d, alpe=%d, ze=%d", g_ActiveConfig.iSaveTargetId, xfregs.numTexGens, xfregs.nNumChans, (int)xfregs.bEnableDualTexTransform, bpmem.ztex2.op, @@ -249,9 +198,9 @@ void VertexManager::Flush() // finally bind if (false == PixelShaderCache::SetShader(false, g_nativeVertexFmt->m_components)) - goto shader_fail; + return; if (false == VertexShaderCache::SetShader(g_nativeVertexFmt->m_components)) - goto shader_fail; + return; const int stride = g_nativeVertexFmt->GetVertexStride(); //if (g_nativeVertexFmt) @@ -265,7 +214,7 @@ void VertexManager::Flush() if (false == g_ActiveConfig.bDstAlphaPass && bpmem.dstalpha.enable && bpmem.blendmode.alphaupdate) { if (false == PixelShaderCache::SetShader(true, g_nativeVertexFmt->m_components)) - goto shader_fail; + return; g_vertex_manager->Draw(stride, true); } @@ -301,9 +250,6 @@ void VertexManager::Flush() } #endif ++g_Config.iSaveTargetId; - -shader_fail: - ResetBuffer(); } #endif @@ -314,12 +260,16 @@ void VertexManager::DoState(PointerWrap& p) void VertexManager::DoStateShared(PointerWrap& p) { - p.DoPointer(s_pCurBufferPointer, LocalVBuffer); - p.DoArray(LocalVBuffer, MAXVBUFFERSIZE); - p.DoArray(TIBuffer, MAXIBUFFERSIZE); - p.DoArray(LIBuffer, MAXIBUFFERSIZE); - p.DoArray(PIBuffer, MAXIBUFFERSIZE); - - if (p.GetMode() == PointerWrap::MODE_READ) - Flushed = false; + // It seems we half-assume to be flushed here + // We update s_pCurBufferPointer yet don't worry about IndexGenerator's outdated pointers + // and maybe other things are overlooked + + p.Do(LocalVBuffer); + p.Do(TIBuffer); + p.Do(LIBuffer); + p.Do(PIBuffer); + + s_pBaseBufferPointer = &LocalVBuffer[0]; + s_pEndBufferPointer = s_pBaseBufferPointer + LocalVBuffer.size(); + p.DoPointer(s_pCurBufferPointer, s_pBaseBufferPointer); } diff --git a/Source/Core/VideoCommon/Src/VertexManagerBase.h b/Source/Core/VideoCommon/Src/VertexManagerBase.h index f3a4aa72e3..2690ffc03e 100644 --- a/Source/Core/VideoCommon/Src/VertexManagerBase.h +++ b/Source/Core/VideoCommon/Src/VertexManagerBase.h @@ -2,72 +2,70 @@ #ifndef _VERTEXMANAGERBASE_H #define _VERTEXMANAGERBASE_H +#include + class NativeVertexFormat; class PointerWrap; class VertexManager { +private: + // What are the actual values? + static const u32 SMALLEST_POSSIBLE_VERTEX = 1; + static const u32 LARGEST_POSSIBLE_VERTEX = 188; + + static const u32 MAX_PRIMITIVES_PER_COMMAND = (u16)-1; + public: - - enum - { - // values from OGL backend - //MAXVBUFFERSIZE = 0x1FFFF, - //MAXIBUFFERSIZE = 0xFFFF, - - // values from DX9 backend - //MAXVBUFFERSIZE = 0x50000, - //MAXIBUFFERSIZE = 0xFFFF, - - // values from DX11 backend - MAXVBUFFERSIZE = 0x50000, - MAXIBUFFERSIZE = 0xFFFF, - }; + static const u32 MAXVBUFFERSIZE = MAX_PRIMITIVES_PER_COMMAND * LARGEST_POSSIBLE_VERTEX; + + // We may convert triangle-fans to triangle-lists, almost 3x as many indices. + static const u32 MAXIBUFFERSIZE = MAX_PRIMITIVES_PER_COMMAND * 3; VertexManager(); - virtual ~VertexManager(); // needs to be virtual for DX11's dtor + // needs to be virtual for DX11's dtor + virtual ~VertexManager(); - static void AddVertices(int _primitive, int _numVertices); + static void AddVertices(int _primitive, u32 _numVertices); - // TODO: protected? static u8 *s_pCurBufferPointer; static u8 *s_pBaseBufferPointer; + static u8 *s_pEndBufferPointer; - static int GetRemainingSize(); - static int GetRemainingVertices(int primitive); + static u32 GetRemainingSize(); + static void PrepareForAdditionalData(int primitive, u32 count, u32 stride); + static u32 GetRemainingIndices(int primitive); static void Flush(); virtual ::NativeVertexFormat* CreateNativeVertexFormat() = 0; - static u16* GetTriangleIndexBuffer() { return TIBuffer; } - static u16* GetLineIndexBuffer() { return LIBuffer; } - static u16* GetPointIndexBuffer() { return PIBuffer; } - static u8* GetVertexBuffer() { return LocalVBuffer; } - static void DoState(PointerWrap& p); virtual void CreateDeviceObjects(){}; virtual void DestroyDeviceObjects(){}; + protected: - // TODO: make private after Flush() is merged - static void ResetBuffer(); - - static u8 *LocalVBuffer; - static u16 *TIBuffer; - static u16 *LIBuffer; - static u16 *PIBuffer; - - static bool Flushed; + u16* GetTriangleIndexBuffer() { return &TIBuffer[0]; } + u16* GetLineIndexBuffer() { return &LIBuffer[0]; } + u16* GetPointIndexBuffer() { return &PIBuffer[0]; } + u8* GetVertexBuffer() { return &s_pBaseBufferPointer[0]; } virtual void vDoState(PointerWrap& p) { DoStateShared(p); } void DoStateShared(PointerWrap& p); private: - static void AddIndices(int primitive, int numVertices); + bool IsFlushed() const; + + void ResetBuffer(); + //virtual void Draw(u32 stride, bool alphapass) = 0; // temp virtual void vFlush() = 0; - + + std::vector LocalVBuffer; + std::vector TIBuffer; + std::vector LIBuffer; + std::vector PIBuffer; }; extern VertexManager *g_vertex_manager; diff --git a/Source/Core/VideoCommon/Src/VideoCommon.h b/Source/Core/VideoCommon/Src/VideoCommon.h index 1eeed20b82..b15d539611 100644 --- a/Source/Core/VideoCommon/Src/VideoCommon.h +++ b/Source/Core/VideoCommon/Src/VideoCommon.h @@ -90,8 +90,8 @@ struct TargetRectangle : public MathUtil::Rectangle #define PRIM_LOG(...) DEBUG_LOG(VIDEO, ##__VA_ARGS__) #endif - -// #define LOG_VTX() DEBUG_LOG(VIDEO, "vtx: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[0], ((float*)VertexManager::s_pCurBufferPointer)[1], ((float*)VertexManager::s_pCurBufferPointer)[2]); +// warning: mapping buffer should be disabled to use this +// #define LOG_VTX() DEBUG_LOG(VIDEO, "vtx: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-3], ((float*)VertexManager::s_pCurBufferPointer)[-2], ((float*)VertexManager::s_pCurBufferPointer)[-1]); #define LOG_VTX() diff --git a/Source/Core/VideoCommon/Src/x64DLCache.cpp b/Source/Core/VideoCommon/Src/x64DLCache.cpp index 559593a08c..6033981759 100644 --- a/Source/Core/VideoCommon/Src/x64DLCache.cpp +++ b/Source/Core/VideoCommon/Src/x64DLCache.cpp @@ -550,8 +550,7 @@ void CompileAndRunDisplayList(u32 address, u32 size, CachedDisplayList *dl) cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7) (cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT, numVertices); - u8* EndAddress = VertexManager::s_pCurBufferPointer; - u32 Vdatasize = (u32)(EndAddress - StartAddress); + u32 Vdatasize = (u32)(VertexManager::s_pCurBufferPointer - StartAddress); if (Vdatasize > 0) { // Compile diff --git a/Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp b/Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp index 6991b11690..8966cb131d 100644 --- a/Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp +++ b/Source/Plugins/Plugin_VideoDX11/Src/VertexManager.cpp @@ -104,7 +104,7 @@ void VertexManager::LoadBuffers() { D3D11_MAPPED_SUBRESOURCE map; - UINT vSize = UINT(s_pCurBufferPointer - LocalVBuffer); + UINT vSize = UINT(s_pCurBufferPointer - s_pBaseBufferPointer); D3D11_MAP MapType = D3D11_MAP_WRITE_NO_OVERWRITE; if (m_vertexBufferCursor + vSize >= VBUFFER_SIZE) { @@ -116,7 +116,7 @@ void VertexManager::LoadBuffers() D3D::context->Map(m_vertexBuffers[m_activeVertexBuffer], 0, MapType, 0, &map); - memcpy((u8*)map.pData + m_vertexBufferCursor, LocalVBuffer, vSize); + memcpy((u8*)map.pData + m_vertexBufferCursor, s_pBaseBufferPointer, vSize); D3D::context->Unmap(m_vertexBuffers[m_activeVertexBuffer], 0); m_vertexDrawOffset = m_vertexBufferCursor; m_vertexBufferCursor += vSize; @@ -136,9 +136,9 @@ void VertexManager::LoadBuffers() m_triangleDrawIndex = m_indexBufferCursor; m_lineDrawIndex = m_triangleDrawIndex + IndexGenerator::GetTriangleindexLen(); m_pointDrawIndex = m_lineDrawIndex + IndexGenerator::GetLineindexLen(); - memcpy((u16*)map.pData + m_triangleDrawIndex, TIBuffer, sizeof(u16) * IndexGenerator::GetTriangleindexLen()); - memcpy((u16*)map.pData + m_lineDrawIndex, LIBuffer, sizeof(u16) * IndexGenerator::GetLineindexLen()); - memcpy((u16*)map.pData + m_pointDrawIndex, PIBuffer, sizeof(u16) * IndexGenerator::GetPointindexLen()); + memcpy((u16*)map.pData + m_triangleDrawIndex, GetTriangleIndexBuffer(), sizeof(u16) * IndexGenerator::GetTriangleindexLen()); + memcpy((u16*)map.pData + m_lineDrawIndex, GetLineIndexBuffer(), sizeof(u16) * IndexGenerator::GetLineindexLen()); + memcpy((u16*)map.pData + m_pointDrawIndex, GetPointIndexBuffer(), sizeof(u16) * IndexGenerator::GetPointindexLen()); D3D::context->Unmap(m_indexBuffers[m_activeIndexBuffer], 0); m_indexBufferCursor += iCount; } @@ -208,13 +208,9 @@ void VertexManager::Draw(UINT stride) if (IndexGenerator::GetNumLines() > 0 || IndexGenerator::GetNumPoints() > 0) ((DX11::Renderer*)g_renderer)->RestoreCull(); } + void VertexManager::vFlush() { - if (LocalVBuffer == s_pCurBufferPointer) return; - if (Flushed) return; - Flushed=true; - VideoFifo_CheckEFBAccess(); - u32 usedtextures = 0; for (u32 i = 0; i < (u32)bpmem.genMode.numtevstages + 1; ++i) if (bpmem.tevorders[i / 2].getEnable(i & 1)) @@ -262,12 +258,12 @@ void VertexManager::vFlush() g_nativeVertexFmt->m_components)) { GFX_DEBUGGER_PAUSE_LOG_AT(NEXT_ERROR,true,{printf("Fail to set pixel shader\n");}); - goto shader_fail; + return; } if (!VertexShaderCache::SetShader(g_nativeVertexFmt->m_components)) { GFX_DEBUGGER_PAUSE_LOG_AT(NEXT_ERROR,true,{printf("Fail to set pixel shader\n");}); - goto shader_fail; + return; } LoadBuffers(); unsigned int stride = g_nativeVertexFmt->GetVertexStride(); @@ -281,9 +277,6 @@ void VertexManager::vFlush() GFX_DEBUGGER_PAUSE_AT(NEXT_FLUSH, true); g_renderer->RestoreState(); - -shader_fail: - ResetBuffer(); } } // namespace diff --git a/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp b/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp index 661cf36e76..7e1f4dfae2 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/VertexManager.cpp @@ -173,7 +173,7 @@ void VertexManager::PrepareVBuffers(int stride) DestroyDeviceObjects(); return; } - memcpy(pVertices, LocalVBuffer, datasize); + memcpy(pVertices, s_pBaseBufferPointer, datasize); VBuffers[CurrentVBuffer]->Unlock(); LockMode = D3DLOCK_NOOVERWRITE; @@ -192,17 +192,17 @@ void VertexManager::PrepareVBuffers(int stride) } if(TdataSize) { - memcpy(pIndices, TIBuffer, TdataSize * sizeof(u16)); + memcpy(pIndices, GetTriangleIndexBuffer(), TdataSize * sizeof(u16)); pIndices += TdataSize; } if(LDataSize) { - memcpy(pIndices, LIBuffer, LDataSize * sizeof(u16)); + memcpy(pIndices, GetLineIndexBuffer(), LDataSize * sizeof(u16)); pIndices += LDataSize; } if(PDataSize) { - memcpy(pIndices, PIBuffer, PDataSize * sizeof(u16)); + memcpy(pIndices, GetPointIndexBuffer(), PDataSize * sizeof(u16)); } IBuffers[CurrentIBuffer]->Unlock(); D3D::dev->SetStreamSource( 0, VBuffers[CurrentVBuffer], CurrentVBufferIndex, stride); @@ -266,9 +266,9 @@ void VertexManager::DrawVA(int stride) if (FAILED(D3D::dev->DrawIndexedPrimitiveUP( D3DPT_TRIANGLELIST, 0, IndexGenerator::GetNumVerts(), IndexGenerator::GetNumTriangles(), - TIBuffer, + GetTriangleIndexBuffer(), D3DFMT_INDEX16, - LocalVBuffer, + s_pBaseBufferPointer, stride))) { DumpBadShaders(); @@ -280,9 +280,9 @@ void VertexManager::DrawVA(int stride) if (FAILED(D3D::dev->DrawIndexedPrimitiveUP( D3DPT_LINELIST, 0, IndexGenerator::GetNumVerts(), IndexGenerator::GetNumLines(), - LIBuffer, + GetLineIndexBuffer(), D3DFMT_INDEX16, - LocalVBuffer, + s_pBaseBufferPointer, stride))) { DumpBadShaders(); @@ -294,9 +294,9 @@ void VertexManager::DrawVA(int stride) if (FAILED(D3D::dev->DrawIndexedPrimitiveUP( D3DPT_POINTLIST, 0, IndexGenerator::GetNumVerts(), IndexGenerator::GetNumPoints(), - PIBuffer, + GetPointIndexBuffer(), D3DFMT_INDEX16, - LocalVBuffer, + s_pBaseBufferPointer, stride))) { DumpBadShaders(); @@ -307,11 +307,6 @@ void VertexManager::DrawVA(int stride) void VertexManager::vFlush() { - if (LocalVBuffer == s_pCurBufferPointer) return; - if (Flushed) return; - Flushed = true; - VideoFifo_CheckEFBAccess(); - u32 usedtextures = 0; for (u32 i = 0; i < (u32)bpmem.genMode.numtevstages + 1; ++i) if (bpmem.tevorders[i / 2].getEnable(i & 1)) @@ -388,7 +383,6 @@ shader_fail: CurrentIBufferIndex += IndexGenerator::GetTriangleindexLen() + IndexGenerator::GetLineindexLen() + IndexGenerator::GetPointindexLen(); CurrentVBufferIndex += IndexGenerator::GetNumVerts() * stride; } - ResetBuffer(); } } diff --git a/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp b/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp index 570ca7cc61..74f1efbb8a 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/VertexManager.cpp @@ -84,27 +84,23 @@ void VertexManager::Draw() { if (IndexGenerator::GetNumTriangles() > 0) { - glDrawElements(GL_TRIANGLES, IndexGenerator::GetTriangleindexLen(), GL_UNSIGNED_SHORT, TIBuffer); + glDrawElements(GL_TRIANGLES, IndexGenerator::GetTriangleindexLen(), GL_UNSIGNED_SHORT, GetTriangleIndexBuffer()); INCSTAT(stats.thisFrame.numIndexedDrawCalls); } if (IndexGenerator::GetNumLines() > 0) { - glDrawElements(GL_LINES, IndexGenerator::GetLineindexLen(), GL_UNSIGNED_SHORT, LIBuffer); + glDrawElements(GL_LINES, IndexGenerator::GetLineindexLen(), GL_UNSIGNED_SHORT, GetLineIndexBuffer()); INCSTAT(stats.thisFrame.numIndexedDrawCalls); } if (IndexGenerator::GetNumPoints() > 0) { - glDrawElements(GL_POINTS, IndexGenerator::GetPointindexLen(), GL_UNSIGNED_SHORT, PIBuffer); + glDrawElements(GL_POINTS, IndexGenerator::GetPointindexLen(), GL_UNSIGNED_SHORT, GetPointIndexBuffer()); INCSTAT(stats.thisFrame.numIndexedDrawCalls); } } void VertexManager::vFlush() { - if (LocalVBuffer == s_pCurBufferPointer) return; - if (Flushed) return; - Flushed=true; - VideoFifo_CheckEFBAccess(); #if defined(_DEBUG) || defined(DEBUGFAST) PRIM_LOG("frame%d:\n texgen=%d, numchan=%d, dualtex=%d, ztex=%d, cole=%d, alpe=%d, ze=%d", g_ActiveConfig.iSaveTargetId, xfregs.numTexGen.numTexGens, xfregs.numChan.numColorChans, xfregs.dualTexTrans.enabled, bpmem.ztex2.op, @@ -136,7 +132,7 @@ void VertexManager::vFlush() (void)GL_REPORT_ERROR(); //glBindBuffer(GL_ARRAY_BUFFER, s_vboBuffers[s_nCurVBOIndex]); - //glBufferData(GL_ARRAY_BUFFER, s_pCurBufferPointer - LocalVBuffer, LocalVBuffer, GL_STREAM_DRAW); + //glBufferData(GL_ARRAY_BUFFER, s_pCurBufferPointer - s_pBaseBufferPointer, s_pBaseBufferPointer, GL_STREAM_DRAW); GL_REPORT_ERRORD(); // setup the pointers @@ -244,8 +240,6 @@ void VertexManager::vFlush() GFX_DEBUGGER_PAUSE_AT(NEXT_FLUSH, true); //s_nCurVBOIndex = (s_nCurVBOIndex + 1) % ARRAYSIZE(s_vboBuffers); - s_pCurBufferPointer = LocalVBuffer; - IndexGenerator::Start(TIBuffer,LIBuffer,PIBuffer); #if defined(_DEBUG) || defined(DEBUGFAST) if (g_ActiveConfig.iLog & CONF_SAVESHADERS)