Merge branch 'vertex-loader-cleanup'

This commit is contained in:
degasus 2013-03-06 14:08:02 +01:00
commit 8d5299c20b
18 changed files with 770 additions and 1732 deletions

View File

@ -172,6 +172,41 @@ inline u64 swap64(u64 data) {return ((u64)swap32(data) << 32) | swap32(data >> 3
inline u16 swap16(const u8* _pData) {return swap16(*(const u16*)_pData);} inline u16 swap16(const u8* _pData) {return swap16(*(const u16*)_pData);}
inline u32 swap32(const u8* _pData) {return swap32(*(const u32*)_pData);} inline u32 swap32(const u8* _pData) {return swap32(*(const u32*)_pData);}
inline u64 swap64(const u8* _pData) {return swap64(*(const u64*)_pData);} inline u64 swap64(const u8* _pData) {return swap64(*(const u64*)_pData);}
template <int count>
void swap(u8*);
template <>
inline void swap<1>(u8* data)
{}
template <>
inline void swap<2>(u8* data)
{
*reinterpret_cast<u16*>(data) = swap16(data);
}
template <>
inline void swap<4>(u8* data)
{
*reinterpret_cast<u32*>(data) = swap32(data);
}
template <>
inline void swap<8>(u8* data)
{
*reinterpret_cast<u64*>(data) = swap64(data);
}
template <typename T>
inline T FromBigEndian(T data)
{
//static_assert(std::is_arithmetic<T>::value, "function only makes sense with arithmetic types");
swap<sizeof(data)>(reinterpret_cast<u8*>(&data));
return data;
}
} // Namespace Common } // Namespace Common
#endif // _COMMONFUNCS_H_ #endif // _COMMONFUNCS_H_

View File

@ -20,6 +20,8 @@
#ifndef _DATAREADER_H #ifndef _DATAREADER_H
#define _DATAREADER_H #define _DATAREADER_H
#include "VertexManagerBase.h"
extern u8* g_pVideoData; extern u8* g_pVideoData;
#if _M_SSE >= 0x301 && !(defined __GNUC__ && !defined __SSSE3__) #if _M_SSE >= 0x301 && !(defined __GNUC__ && !defined __SSSE3__)
@ -31,43 +33,63 @@ __forceinline void DataSkip(u32 skip)
g_pVideoData += skip; g_pVideoData += skip;
} }
// probably unnecessary
template <int count>
__forceinline void DataSkip()
{
g_pVideoData += count;
}
template <typename T>
__forceinline T DataPeek(int _uOffset)
{
auto const result = Common::FromBigEndian(*reinterpret_cast<T*>(g_pVideoData + _uOffset));
return result;
}
// TODO: kill these
__forceinline u8 DataPeek8(int _uOffset) __forceinline u8 DataPeek8(int _uOffset)
{ {
return g_pVideoData[_uOffset]; return DataPeek<u8>(_uOffset);
} }
__forceinline u16 DataPeek16(int _uOffset) __forceinline u16 DataPeek16(int _uOffset)
{ {
return Common::swap16(*(u16*)&g_pVideoData[_uOffset]); return DataPeek<u16>(_uOffset);
} }
__forceinline u32 DataPeek32(int _uOffset) __forceinline u32 DataPeek32(int _uOffset)
{ {
return Common::swap32(*(u32*)&g_pVideoData[_uOffset]); return DataPeek<u32>(_uOffset);
} }
template <typename T>
__forceinline T DataRead()
{
auto const result = DataPeek<T>(0);
DataSkip<sizeof(T)>();
return result;
}
// TODO: kill these
__forceinline u8 DataReadU8() __forceinline u8 DataReadU8()
{ {
return *g_pVideoData++; return DataRead<u8>();
} }
__forceinline s8 DataReadS8() __forceinline s8 DataReadS8()
{ {
return (s8)(*g_pVideoData++); return DataRead<s8>();
} }
__forceinline u16 DataReadU16() __forceinline u16 DataReadU16()
{ {
u16 tmp = Common::swap16(*(u16*)g_pVideoData); return DataRead<u16>();
g_pVideoData += 2;
return tmp;
} }
__forceinline u32 DataReadU32() __forceinline u32 DataReadU32()
{ {
u32 tmp = Common::swap32(*(u32*)g_pVideoData); return DataRead<u32>();
g_pVideoData += 4;
return tmp;
} }
typedef void (*DataReadU32xNfunc)(u32 *buf); typedef void (*DataReadU32xNfunc)(u32 *buf);
@ -120,58 +142,16 @@ __forceinline u32 DataReadU32Unswapped()
return tmp; return tmp;
} }
template<class T>
__forceinline T DataRead()
{
T tmp = *(T*)g_pVideoData;
g_pVideoData += sizeof(T);
return tmp;
}
template <>
__forceinline u16 DataRead()
{
u16 tmp = Common::swap16(*(u16*)g_pVideoData);
g_pVideoData += 2;
return tmp;
}
template <>
__forceinline s16 DataRead()
{
s16 tmp = (s16)Common::swap16(*(u16*)g_pVideoData);
g_pVideoData += 2;
return tmp;
}
template <>
__forceinline u32 DataRead()
{
u32 tmp = (u32)Common::swap32(*(u32*)g_pVideoData);
g_pVideoData += 4;
return tmp;
}
template <>
__forceinline s32 DataRead()
{
s32 tmp = (s32)Common::swap32(*(u32*)g_pVideoData);
g_pVideoData += 4;
return tmp;
}
__forceinline float DataReadF32()
{
union {u32 i; float f;} temp;
temp.i = Common::swap32(*(u32*)g_pVideoData);
g_pVideoData += 4;
float tmp = temp.f;
return tmp;
}
__forceinline u8* DataGetPosition() __forceinline u8* DataGetPosition()
{ {
return g_pVideoData; return g_pVideoData;
} }
template <typename T>
__forceinline void DataWrite(T data)
{
*(T*)VertexManager::s_pCurBufferPointer = data;
VertexManager::s_pCurBufferPointer += sizeof(T);
}
#endif #endif

View File

@ -15,6 +15,9 @@
// Official SVN repository and contact information can be found at // Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/ // http://code.google.com/p/dolphin-emu/
#include <cstddef>
#include "Common.h"
#include "IndexGenerator.h" #include "IndexGenerator.h"
/* /*
@ -27,24 +30,18 @@ QUAD simulator
*/ */
//Init //Init
u16 *IndexGenerator::Tptr = 0; u16 *IndexGenerator::Tptr;
u16 *IndexGenerator::BASETptr = 0; u16 *IndexGenerator::BASETptr;
u16 *IndexGenerator::Lptr = 0; u16 *IndexGenerator::Lptr;
u16 *IndexGenerator::BASELptr = 0; u16 *IndexGenerator::BASELptr;
u16 *IndexGenerator::Pptr = 0; u16 *IndexGenerator::Pptr;
u16 *IndexGenerator::BASEPptr = 0; u16 *IndexGenerator::BASEPptr;
int IndexGenerator::numT = 0; u32 IndexGenerator::numT;
int IndexGenerator::numL = 0; u32 IndexGenerator::numL;
int IndexGenerator::numP = 0; u32 IndexGenerator::numP;
int IndexGenerator::index = 0; u32 IndexGenerator::index;
int IndexGenerator::Tadds = 0;
int IndexGenerator::Ladds = 0;
int IndexGenerator::Padds = 0;
IndexGenerator::IndexPrimitiveType IndexGenerator::LastTPrimitive = Prim_None;
IndexGenerator::IndexPrimitiveType IndexGenerator::LastLPrimitive = Prim_None;
bool IndexGenerator::used = false;
void IndexGenerator::Start(u16 *Triangleptr,u16 *Lineptr,u16 *Pointptr) void IndexGenerator::Start(u16* Triangleptr, u16* Lineptr, u16* Pointptr)
{ {
Tptr = Triangleptr; Tptr = Triangleptr;
Lptr = Lineptr; Lptr = Lineptr;
@ -56,288 +53,116 @@ void IndexGenerator::Start(u16 *Triangleptr,u16 *Lineptr,u16 *Pointptr)
numT = 0; numT = 0;
numL = 0; numL = 0;
numP = 0; numP = 0;
Tadds = 0;
Ladds = 0;
Padds = 0;
LastTPrimitive = Prim_None;
LastLPrimitive = Prim_None;
} }
void IndexGenerator::AddIndices(int primitive, u32 numVerts)
{
//switch (primitive)
//{
//case GX_DRAW_QUADS: IndexGenerator::AddQuads(numVerts); break;
//case GX_DRAW_TRIANGLES: IndexGenerator::AddList(numVerts); break;
//case GX_DRAW_TRIANGLE_STRIP: IndexGenerator::AddStrip(numVerts); break;
//case GX_DRAW_TRIANGLE_FAN: IndexGenerator::AddFan(numVerts); break;
//case GX_DRAW_LINES: IndexGenerator::AddLineList(numVerts); break;
//case GX_DRAW_LINE_STRIP: IndexGenerator::AddLineStrip(numVerts); break;
//case GX_DRAW_POINTS: IndexGenerator::AddPoints(numVerts); break;
//}
static void (*const primitive_table[])(u32) =
{
IndexGenerator::AddQuads,
NULL,
IndexGenerator::AddList,
IndexGenerator::AddStrip,
IndexGenerator::AddFan,
IndexGenerator::AddLineList,
IndexGenerator::AddLineStrip,
IndexGenerator::AddPoints,
};
primitive_table[primitive](numVerts);
index += numVerts;
}
// Triangles // Triangles
void IndexGenerator::AddList(int numVerts) __forceinline void IndexGenerator::WriteTriangle(u32 index1, u32 index2, u32 index3)
{ {
//if we have no vertices return *Tptr++ = index1;
if(numVerts <= 0) return; *Tptr++ = index2;
int numTris = numVerts / 3; *Tptr++ = index3;
if (!numTris)
{ ++numT;
//if we have less than 3 verts
if(numVerts == 1)
{
// discard
index++;
return;
}
else
{
//we have two verts render a degenerated triangle
numTris = 1;
*Tptr++ = index;
*Tptr++ = index+1;
*Tptr++ = index;
}
}
else
{
for (int i = 0; i < numTris; i++)
{
*Tptr++ = index+i*3;
*Tptr++ = index+i*3+1;
*Tptr++ = index+i*3+2;
}
int baseRemainingverts = numVerts - numVerts % 3;
switch (numVerts % 3)
{
case 2:
//whe have 2 remaining verts use strip method
*Tptr++ = index + baseRemainingverts - 1;
*Tptr++ = index + baseRemainingverts;
*Tptr++ = index + baseRemainingverts + 1;
numTris++;
break;
case 1:
//whe have 1 remaining verts use strip method this is only a conjeture
*Tptr++ = index + baseRemainingverts - 2;
*Tptr++ = index + baseRemainingverts - 1;
*Tptr++ = index + baseRemainingverts;
numTris++;
break;
default:
break;
};
}
index += numVerts;
numT += numTris;
Tadds++;
LastTPrimitive = Prim_List;
} }
void IndexGenerator::AddStrip(int numVerts) void IndexGenerator::AddList(u32 const numVerts)
{ {
if(numVerts <= 0) return; auto const numTris = numVerts / 3;
int numTris = numVerts - 2; for (u32 i = 0; i != numTris; ++i)
if (numTris < 1)
{ {
//if we have less than 3 verts WriteTriangle(index + i * 3, index + i * 3 + 1, index + i * 3 + 2);
if(numVerts == 1)
{
// discard
index++;
return;
}
else
{
//we have two verts render a degenerated triangle
numTris = 1;
*Tptr++ = index;
*Tptr++ = index+1;
*Tptr++ = index;
}
} }
else
{
bool wind = false;
for (int i = 0; i < numTris; i++)
{
*Tptr++ = index+i;
*Tptr++ = index+i+(wind?2:1);
*Tptr++ = index+i+(wind?1:2);
wind = !wind;
}
}
index += numVerts;
numT += numTris;
Tadds++;
LastTPrimitive = Prim_Strip;
}
void IndexGenerator::AddFan(int numVerts)
{
if(numVerts <= 0) return;
int numTris = numVerts - 2;
if (numTris < 1)
{
//if we have less than 3 verts
if(numVerts == 1)
{
//Discard
index++;
return;
}
else
{
//we have two verts render a degenerated triangle
numTris = 1;
*Tptr++ = index;
*Tptr++ = index+1;
*Tptr++ = index;
}
}
else
{
for (int i = 0; i < numTris; i++)
{
*Tptr++ = index;
*Tptr++ = index+i+1;
*Tptr++ = index+i+2;
}
}
index += numVerts;
numT += numTris;
Tadds++;
LastTPrimitive = Prim_Fan;
} }
void IndexGenerator::AddQuads(int numVerts) void IndexGenerator::AddStrip(u32 const numVerts)
{ {
if(numVerts <= 0) return; bool wind = false;
int numTris = (numVerts/4)*2; for (u32 i = 2; i < numVerts; ++i)
if (numTris == 0)
{ {
//if we have less than 3 verts WriteTriangle(
if(numVerts == 1) index + i - 2,
{ index + i - !wind,
//discard index + i - wind);
index++;
return; wind ^= true;
}
else
{
if(numVerts == 2)
{
//we have two verts render a degenerated triangle
numTris = 1;
*Tptr++ = index;
*Tptr++ = index + 1;
*Tptr++ = index;
}
else
{
//we have 3 verts render a full triangle
numTris = 1;
*Tptr++ = index;
*Tptr++ = index + 1;
*Tptr++ = index + 2;
}
}
} }
else
{
for (int i = 0; i < numTris / 2; i++)
{
*Tptr++ = index+i*4;
*Tptr++ = index+i*4+1;
*Tptr++ = index+i*4+2;
*Tptr++ = index+i*4;
*Tptr++ = index+i*4+2;
*Tptr++ = index+i*4+3;
}
int baseRemainingverts = numVerts - numVerts % 4;
switch (numVerts % 4)
{
case 3:
//whe have 3 remaining verts use strip method
*Tptr++ = index + baseRemainingverts;
*Tptr++ = index + baseRemainingverts + 1;
*Tptr++ = index + baseRemainingverts + 2;
numTris++;
break;
case 2:
//whe have 2 remaining verts use strip method
*Tptr++ = index + baseRemainingverts - 1;
*Tptr++ = index + baseRemainingverts;
*Tptr++ = index + baseRemainingverts + 1;
numTris++;
break;
case 1:
//whe have 1 remaining verts use strip method this is only a conjeture
*Tptr++ = index + baseRemainingverts - 2;
*Tptr++ = index + baseRemainingverts - 1;
*Tptr++ = index + baseRemainingverts;
numTris++;
break;
default:
break;
};
}
index += numVerts;
numT += numTris;
Tadds++;
LastTPrimitive = Prim_List;
} }
void IndexGenerator::AddFan(u32 numVerts)
//Lines
void IndexGenerator::AddLineList(int numVerts)
{ {
if(numVerts <= 0) return; for (u32 i = 2; i < numVerts; ++i)
int numLines = numVerts / 2;
if (!numLines)
{ {
//Discard WriteTriangle(index, index + i - 1, index + i);
index++;
return;
} }
else
{
for (int i = 0; i < numLines; i++)
{
*Lptr++ = index+i*2;
*Lptr++ = index+i*2+1;
}
if((numVerts & 1) != 0)
{
//use line strip for remaining vert
*Lptr++ = index + numLines * 2 - 1;
*Lptr++ = index + numLines * 2;
}
}
index += numVerts;
numL += numLines;
Ladds++;
LastLPrimitive = Prim_List;
} }
void IndexGenerator::AddLineStrip(int numVerts) void IndexGenerator::AddQuads(u32 numVerts)
{ {
int numLines = numVerts - 1; auto const numQuads = numVerts / 4;
if (numLines <= 0) for (u32 i = 0; i != numQuads; ++i)
{ {
if(numVerts == 1) WriteTriangle(index + i * 4, index + i * 4 + 1, index + i * 4 + 2);
{ WriteTriangle(index + i * 4, index + i * 4 + 2, index + i * 4 + 3);
index++;
}
return;
} }
for (int i = 0; i < numLines; i++)
{
*Lptr++ = index+i;
*Lptr++ = index+i+1;
}
index += numVerts;
numL += numLines;
Ladds++;
LastLPrimitive = Prim_Strip;
} }
// Lines
void IndexGenerator::AddLineList(u32 numVerts)
//Points
void IndexGenerator::AddPoints(int numVerts)
{ {
for (int i = 0; i < numVerts; i++) auto const numLines = numVerts / 2;
for (u32 i = 0; i != numLines; ++i)
{ {
*Pptr++ = index+i; *Lptr++ = index + i * 2;
*Lptr++ = index + i * 2 + 1;
++numL;
}
}
void IndexGenerator::AddLineStrip(u32 numVerts)
{
for (u32 i = 1; i < numVerts; ++i)
{
*Lptr++ = index + i - 1;
*Lptr++ = index + i;
++numL;
}
}
// Points
void IndexGenerator::AddPoints(u32 numVerts)
{
for (u32 i = 0; i != numVerts; ++i)
{
*Pptr++ = index + i;
++numP;
} }
index += numVerts;
numP += numVerts;
Padds++;
} }

View File

@ -25,53 +25,58 @@
class IndexGenerator class IndexGenerator
{ {
public: public:
//Init // Init
static void Start(u16 *Triangleptr,u16 *Lineptr,u16 *Pointptr); static void Start(u16 *Triangleptr,u16 *Lineptr,u16 *Pointptr);
//Triangles
static void AddList(int numVerts);
static void AddStrip(int numVerts);
static void AddFan(int numVerts);
static void AddQuads(int numVerts);
//Lines
static void AddLineList(int numVerts);
static void AddLineStrip(int numVerts);
//Points
static void AddPoints(int numVerts);
//Interface
static int GetNumTriangles() {used = true; return numT;}
static int GetNumLines() {used = true;return numL;}
static int GetNumPoints() {used = true;return numP;}
static int GetNumVerts() {return index;} //returns numprimitives
static int GetNumAdds() {return Tadds + Ladds + Padds;}
static int GetTriangleindexLen() {return (int)(Tptr - BASETptr);}
static int GetLineindexLen() {return (int)(Lptr - BASELptr);}
static int GetPointindexLen() {return (int)(Pptr - BASEPptr);}
static void AddIndices(int primitive, u32 numVertices);
// Interface
static u32 GetNumTriangles() {return numT;}
static u32 GetNumLines() {return numL;}
static u32 GetNumPoints() {return numP;}
// returns numprimitives
static u32 GetNumVerts() {return index;}
static u32 GetTriangleindexLen() {return (u32)(Tptr - BASETptr);}
static u32 GetLineindexLen() {return (u32)(Lptr - BASELptr);}
static u32 GetPointindexLen() {return (u32)(Pptr - BASEPptr);}
/*
enum IndexPrimitiveType enum IndexPrimitiveType
{ {
Prim_None = 0, Prim_None = 0,
Prim_List, Prim_List,
Prim_Strip, Prim_Strip,
Prim_Fan Prim_Fan
} ; };
*/
private: private:
// Triangles
static void AddList(u32 numVerts);
static void AddStrip(u32 numVerts);
static void AddFan(u32 numVerts);
static void AddQuads(u32 numVerts);
// Lines
static void AddLineList(u32 numVerts);
static void AddLineStrip(u32 numVerts);
// Points
static void AddPoints(u32 numVerts);
static void WriteTriangle(u32 index1, u32 index2, u32 index3);
static u16 *Tptr; static u16 *Tptr;
static u16 *BASETptr; static u16 *BASETptr;
static u16 *Lptr; static u16 *Lptr;
static u16 *BASELptr; static u16 *BASELptr;
static u16 *Pptr; static u16 *Pptr;
static u16 *BASEPptr; static u16 *BASEPptr;
static int numT; // TODO: redundant variables
static int numL; static u32 numT;
static int numP; static u32 numL;
static int index; static u32 numP;
static int Tadds; static u32 index;
static int Ladds;
static int Padds;
static IndexPrimitiveType LastTPrimitive;
static IndexPrimitiveType LastLPrimitive;
static bool used;
}; };
#endif // _INDEXGENERATOR_H #endif // _INDEXGENERATOR_H

View File

@ -73,6 +73,10 @@ int colElements[2];
float posScale; float posScale;
float tcScale[8]; float tcScale[8];
// bbox must read vertex position, so convert it to this buffer
static float s_bbox_vertex_buffer[3];
static u8 *s_bbox_pCurBufferPointer_orig;
static const float fractionTable[32] = { static const float fractionTable[32] = {
1.0f / (1U << 0), 1.0f / (1U << 1), 1.0f / (1U << 2), 1.0f / (1U << 3), 1.0f / (1U << 0), 1.0f / (1U << 1), 1.0f / (1U << 2), 1.0f / (1U << 3),
1.0f / (1U << 4), 1.0f / (1U << 5), 1.0f / (1U << 6), 1.0f / (1U << 7), 1.0f / (1U << 4), 1.0f / (1U << 5), 1.0f / (1U << 6), 1.0f / (1U << 7),
@ -95,10 +99,21 @@ void LOADERDECL PosMtx_ReadDirect_UByte()
void LOADERDECL PosMtx_Write() void LOADERDECL PosMtx_Write()
{ {
*VertexManager::s_pCurBufferPointer++ = s_curposmtx; DataWrite<u8>(s_curposmtx);
*VertexManager::s_pCurBufferPointer++ = 0; DataWrite<u8>(0);
*VertexManager::s_pCurBufferPointer++ = 0; DataWrite<u8>(0);
*VertexManager::s_pCurBufferPointer++ = 0; DataWrite<u8>(0);
}
void LOADERDECL UpdateBoundingBoxPrepare()
{
if (!PixelEngine::bbox_active)
return;
// set our buffer as videodata buffer, so we will get a copy of the vertex positions
// this is a big hack, but so we can use the same converting function then without bbox
s_bbox_pCurBufferPointer_orig = VertexManager::s_pCurBufferPointer;
VertexManager::s_pCurBufferPointer = (u8*)s_bbox_vertex_buffer;
} }
void LOADERDECL UpdateBoundingBox() void LOADERDECL UpdateBoundingBox()
@ -106,12 +121,16 @@ void LOADERDECL UpdateBoundingBox()
if (!PixelEngine::bbox_active) if (!PixelEngine::bbox_active)
return; return;
// Truly evil hack, reading backwards from the write pointer. If we were writing to write-only // reset videodata pointer
// memory like we might have been with a D3D vertex buffer, this would have been a bad idea. VertexManager::s_pCurBufferPointer = s_bbox_pCurBufferPointer_orig;
float *data = (float *)(VertexManager::s_pCurBufferPointer - 12);
// copy vertex pointers
memcpy(VertexManager::s_pCurBufferPointer, s_bbox_vertex_buffer, 12);
VertexManager::s_pCurBufferPointer += 12;
// We must transform the just loaded point by the current world and projection matrix - in software. // We must transform the just loaded point by the current world and projection matrix - in software.
// Then convert to screen space and update the bounding box. // Then convert to screen space and update the bounding box.
float p[3] = {data[0], data[1], data[2]}; float p[3] = {s_bbox_vertex_buffer[0], s_bbox_vertex_buffer[1], s_bbox_vertex_buffer[2]};
const float *world_matrix = (float*)xfmem + MatrixIndexA.PosNormalMtxIdx * 4; const float *world_matrix = (float*)xfmem + MatrixIndexA.PosNormalMtxIdx * 4;
const float *proj_matrix = &g_fProjectionMatrix[0]; const float *proj_matrix = &g_fProjectionMatrix[0];
@ -149,24 +168,22 @@ void LOADERDECL TexMtx_ReadDirect_UByte()
void LOADERDECL TexMtx_Write_Float() void LOADERDECL TexMtx_Write_Float()
{ {
*(float*)VertexManager::s_pCurBufferPointer = (float)s_curtexmtx[s_texmtxwrite++]; DataWrite(float(s_curtexmtx[s_texmtxwrite++]));
VertexManager::s_pCurBufferPointer += 4;
} }
void LOADERDECL TexMtx_Write_Float2() void LOADERDECL TexMtx_Write_Float2()
{ {
((float*)VertexManager::s_pCurBufferPointer)[0] = 0; DataWrite(0.f);
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)s_curtexmtx[s_texmtxwrite++]; DataWrite(float(s_curtexmtx[s_texmtxwrite++]));
VertexManager::s_pCurBufferPointer += 8;
} }
void LOADERDECL TexMtx_Write_Float4() void LOADERDECL TexMtx_Write_Float4()
{ {
((float*)VertexManager::s_pCurBufferPointer)[0] = 0; DataWrite(0.f);
((float*)VertexManager::s_pCurBufferPointer)[1] = 0; DataWrite(0.f);
((float*)VertexManager::s_pCurBufferPointer)[2] = s_curtexmtx[s_texmtxwrite++]; DataWrite(float(s_curtexmtx[s_texmtxwrite++]));
((float*)VertexManager::s_pCurBufferPointer)[3] = 0; // Just to fill out with 0. // Just to fill out with 0.
VertexManager::s_pCurBufferPointer += 16; DataWrite(0.f);
} }
VertexLoader::VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr) VertexLoader::VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr)
@ -274,15 +291,16 @@ void VertexLoader::CompileVertexTranslator()
if (m_VtxDesc.Tex7MatIdx) {m_VertexSize += 1; m_NativeFmt->m_components |= VB_HAS_TEXMTXIDX7; WriteCall(TexMtx_ReadDirect_UByte); } if (m_VtxDesc.Tex7MatIdx) {m_VertexSize += 1; m_NativeFmt->m_components |= VB_HAS_TEXMTXIDX7; WriteCall(TexMtx_ReadDirect_UByte); }
// Write vertex position loader // Write vertex position loader
WriteCall(VertexLoader_Position::GetFunction(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements)); if(g_ActiveConfig.bUseBBox) {
WriteCall(UpdateBoundingBoxPrepare);
WriteCall(VertexLoader_Position::GetFunction(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements));
WriteCall(UpdateBoundingBox);
} else {
WriteCall(VertexLoader_Position::GetFunction(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements));
}
m_VertexSize += VertexLoader_Position::GetSize(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements); m_VertexSize += VertexLoader_Position::GetSize(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements);
nat_offset += 12; nat_offset += 12;
// OK, so we just got a point. Let's go back and read it for the bounding box.
if(g_ActiveConfig.bUseBBox)
WriteCall(UpdateBoundingBox);
// Normals // Normals
vtx_decl.num_normals = 0; vtx_decl.num_normals = 0;
if (m_VtxDesc.Normal != NOT_PRESENT) if (m_VtxDesc.Normal != NOT_PRESENT)
@ -507,7 +525,8 @@ void VertexLoader::WriteSetVariable(int bits, void *address, OpArg value)
#endif #endif
} }
#endif #endif
void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
int VertexLoader::SetupRunVertices(int vtx_attr_group, int primitive, int const count)
{ {
m_numLoadedVertices += count; m_numLoadedVertices += count;
@ -526,7 +545,7 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
{ {
// if cull mode is none, ignore triangles and quads // if cull mode is none, ignore triangles and quads
DataSkip(count * m_VertexSize); DataSkip(count * m_VertexSize);
return; return 0;
} }
m_NativeFmt->EnableComponents(m_NativeFmt->m_components); m_NativeFmt->EnableComponents(m_NativeFmt->m_components);
@ -550,157 +569,48 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
for (int i = 0; i < 2; i++) for (int i = 0; i < 2; i++)
colElements[i] = m_VtxAttr.color[i].Elements; colElements[i] = m_VtxAttr.color[i].Elements;
// if strips or fans, make sure all vertices can fit in buffer, otherwise flush VertexManager::PrepareForAdditionalData(primitive, count, native_stride);
int granularity = 1;
switch (primitive) {
case 3: // strip .. hm, weird
case 4: // fan
if (VertexManager::GetRemainingSize() < 3 * native_stride)
VertexManager::Flush();
break;
case 6: // line strip
if (VertexManager::GetRemainingSize() < 2 * native_stride)
VertexManager::Flush();
break;
case 0: granularity = 4; break; // quads
case 2: granularity = 3; break; // tris
case 5: granularity = 2; break; // lines
}
int startv = 0, extraverts = 0; return count;
int v = 0;
//int remainingVerts2 = VertexManager::GetRemainingVertices(primitive);
while (v < count)
{
int remainingVerts = VertexManager::GetRemainingSize() / native_stride;
//if (remainingVerts2 - v + startv < remainingVerts)
//remainingVerts = remainingVerts2 - v + startv;
if (remainingVerts < granularity) {
INCSTAT(stats.thisFrame.numBufferSplits);
// This buffer full - break current primitive and flush, to switch to the next buffer.
u8* plastptr = VertexManager::s_pCurBufferPointer;
if (v - startv > 0)
VertexManager::AddVertices(primitive, v - startv + extraverts);
VertexManager::Flush();
//remainingVerts2 = VertexManager::GetRemainingVertices(primitive);
// Why does this need to be so complicated?
switch (primitive) {
case 3: // triangle strip, copy last two vertices
// a little trick since we have to keep track of signs
if (v & 1) {
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-2*native_stride, native_stride);
memcpy_gc(VertexManager::s_pCurBufferPointer+native_stride, plastptr-native_stride*2, 2*native_stride);
VertexManager::s_pCurBufferPointer += native_stride*3;
extraverts = 3;
}
else {
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride*2, native_stride*2);
VertexManager::s_pCurBufferPointer += native_stride*2;
extraverts = 2;
}
break;
case 4: // tri fan, copy first and last vert
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride*(v-startv+extraverts), native_stride);
VertexManager::s_pCurBufferPointer += native_stride;
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride, native_stride);
VertexManager::s_pCurBufferPointer += native_stride;
extraverts = 2;
break;
case 6: // line strip
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride, native_stride);
VertexManager::s_pCurBufferPointer += native_stride;
extraverts = 1;
break;
default:
extraverts = 0;
break;
}
startv = v;
}
int remainingPrims = remainingVerts / granularity;
remainingVerts = remainingPrims * granularity;
if (count - v < remainingVerts)
remainingVerts = count - v;
#ifdef USE_JIT
if (remainingVerts > 0) {
loop_counter = remainingVerts;
((void (*)())(void*)m_compiledCode)();
}
#else
for (int s = 0; s < remainingVerts; s++)
{
tcIndex = 0;
colIndex = 0;
s_texmtxwrite = s_texmtxread = 0;
for (int i = 0; i < m_numPipelineStages; i++)
m_PipelineStages[i]();
PRIM_LOG("\n");
}
#endif
v += remainingVerts;
}
if (startv < count)
VertexManager::AddVertices(primitive, count - startv + extraverts);
} }
void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int const count)
void VertexLoader::RunCompiledVertices(int vtx_attr_group, int primitive, int count, u8* Data)
{ {
m_numLoadedVertices += count; auto const new_count = SetupRunVertices(vtx_attr_group, primitive, count);
ConvertVertices(new_count);
// Flush if our vertex format is different from the currently set. VertexManager::AddVertices(primitive, new_count);
if (g_nativeVertexFmt != NULL && g_nativeVertexFmt != m_NativeFmt)
{
// We really must flush here. It's possible that the native representations
// of the two vtx formats are the same, but we have no way to easily check that
// now.
VertexManager::Flush();
// Also move the Set() here?
}
g_nativeVertexFmt = m_NativeFmt;
if (bpmem.genMode.cullmode == 3 && primitive < 5)
{
// if cull mode is none, ignore triangles and quads
DataSkip(count * m_VertexSize);
return;
}
m_NativeFmt->EnableComponents(m_NativeFmt->m_components);
// Load position and texcoord scale factors.
m_VtxAttr.PosFrac = g_VtxAttr[vtx_attr_group].g0.PosFrac;
m_VtxAttr.texCoord[0].Frac = g_VtxAttr[vtx_attr_group].g0.Tex0Frac;
m_VtxAttr.texCoord[1].Frac = g_VtxAttr[vtx_attr_group].g1.Tex1Frac;
m_VtxAttr.texCoord[2].Frac = g_VtxAttr[vtx_attr_group].g1.Tex2Frac;
m_VtxAttr.texCoord[3].Frac = g_VtxAttr[vtx_attr_group].g1.Tex3Frac;
m_VtxAttr.texCoord[4].Frac = g_VtxAttr[vtx_attr_group].g2.Tex4Frac;
m_VtxAttr.texCoord[5].Frac = g_VtxAttr[vtx_attr_group].g2.Tex5Frac;
m_VtxAttr.texCoord[6].Frac = g_VtxAttr[vtx_attr_group].g2.Tex6Frac;
m_VtxAttr.texCoord[7].Frac = g_VtxAttr[vtx_attr_group].g2.Tex7Frac;
pVtxAttr = &m_VtxAttr;
posScale = fractionTable[m_VtxAttr.PosFrac];
if (m_NativeFmt->m_components & VB_HAS_UVALL)
for (int i = 0; i < 8; i++)
tcScale[i] = fractionTable[m_VtxAttr.texCoord[i].Frac];
for (int i = 0; i < 2; i++)
colElements[i] = m_VtxAttr.color[i].Elements;
if(VertexManager::GetRemainingSize() < native_stride * count)
VertexManager::Flush();
memcpy_gc(VertexManager::s_pCurBufferPointer, Data, native_stride * count);
VertexManager::s_pCurBufferPointer += native_stride * count;
DataSkip(count * m_VertexSize);
VertexManager::AddVertices(primitive, count);
} }
void VertexLoader::ConvertVertices ( int count )
{
#ifdef USE_JIT
if (count > 0) {
loop_counter = count;
((void (*)())(void*)m_compiledCode)();
}
#else
for (int s = 0; s < count; s++)
{
tcIndex = 0;
colIndex = 0;
s_texmtxwrite = s_texmtxread = 0;
for (int i = 0; i < m_numPipelineStages; i++)
m_PipelineStages[i]();
PRIM_LOG("\n");
}
#endif
}
void VertexLoader::RunCompiledVertices(int vtx_attr_group, int primitive, int const count, u8* Data)
{
auto const new_count = SetupRunVertices(vtx_attr_group, primitive, count);
memcpy_gc(VertexManager::s_pCurBufferPointer, Data, native_stride * new_count);
VertexManager::s_pCurBufferPointer += native_stride * new_count;
DataSkip(new_count * m_VertexSize);
VertexManager::AddVertices(primitive, new_count);
}
void VertexLoader::SetVAT(u32 _group0, u32 _group1, u32 _group2) void VertexLoader::SetVAT(u32 _group0, u32 _group1, u32 _group2)
{ {

View File

@ -88,6 +88,8 @@ public:
~VertexLoader(); ~VertexLoader();
int GetVertexSize() const {return m_VertexSize;} int GetVertexSize() const {return m_VertexSize;}
int SetupRunVertices(int vtx_attr_group, int primitive, int const count);
void RunVertices(int vtx_attr_group, int primitive, int count); void RunVertices(int vtx_attr_group, int primitive, int count);
void RunCompiledVertices(int vtx_attr_group, int primitive, int count, u8* Data); void RunCompiledVertices(int vtx_attr_group, int primitive, int count, u8* Data);
@ -124,6 +126,7 @@ private:
void SetVAT(u32 _group0, u32 _group1, u32 _group2); void SetVAT(u32 _group0, u32 _group1, u32 _group2);
void CompileVertexTranslator(); void CompileVertexTranslator();
void ConvertVertices(int count);
void WriteCall(TPipelineFunction); void WriteCall(TPipelineFunction);

View File

@ -15,9 +15,6 @@
// Official SVN repository and contact information can be found at // Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/ // http://code.google.com/p/dolphin-emu/
#ifndef _VERTEXLOADERCOLOR_H
#define _VERTEXLOADERCOLOR_H
#include "Common.h" #include "Common.h"
#include "VideoCommon.h" #include "VideoCommon.h"
#include "LookUpTables.h" #include "LookUpTables.h"
@ -37,8 +34,7 @@ extern int colElements[2];
__forceinline void _SetCol(u32 val) __forceinline void _SetCol(u32 val)
{ {
*(u32*)VertexManager::s_pCurBufferPointer = val; DataWrite(val);
VertexManager::s_pCurBufferPointer += 4;
colIndex++; colIndex++;
} }
@ -132,80 +128,65 @@ void LOADERDECL Color_ReadDirect_32b_8888()
_SetCol(col); _SetCol(col);
} }
template <typename I>
void Color_ReadIndex_16b_565()
void LOADERDECL Color_ReadIndex8_16b_565()
{ {
u8 Index = DataReadU8(); auto const Index = DataRead<I>();
u16 val = Common::swap16(*(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]))); u16 val = Common::swap16(*(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex])));
_SetCol565(val); _SetCol565(val);
} }
void LOADERDECL Color_ReadIndex8_24b_888()
template <typename I>
void Color_ReadIndex_24b_888()
{ {
u8 Index = DataReadU8(); auto const Index = DataRead<I>();
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]); const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
_SetCol(_Read24(iAddress)); _SetCol(_Read24(iAddress));
} }
void LOADERDECL Color_ReadIndex8_32b_888x()
template <typename I>
void Color_ReadIndex_32b_888x()
{ {
u8 Index = DataReadU8(); auto const Index = DataRead<I>();
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]); const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
_SetCol(_Read24(iAddress)); _SetCol(_Read24(iAddress));
} }
void LOADERDECL Color_ReadIndex8_16b_4444()
template <typename I>
void Color_ReadIndex_16b_4444()
{ {
u8 Index = DataReadU8(); auto const Index = DataRead<I>();
u16 val = *(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex])); u16 val = *(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]));
_SetCol4444(val); _SetCol4444(val);
} }
void LOADERDECL Color_ReadIndex8_24b_6666()
template <typename I>
void Color_ReadIndex_24b_6666()
{ {
u8 Index = DataReadU8(); auto const Index = DataRead<I>();
const u8* pData = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]) - 1; const u8* pData = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]) - 1;
u32 val = Common::swap32(pData); u32 val = Common::swap32(pData);
_SetCol6666(val); _SetCol6666(val);
} }
void LOADERDECL Color_ReadIndex8_32b_8888()
template <typename I>
void Color_ReadIndex_32b_8888()
{ {
u8 Index = DataReadU8(); auto const Index = DataRead<I>();
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]); const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
_SetCol(_Read32(iAddress)); _SetCol(_Read32(iAddress));
} }
void LOADERDECL Color_ReadIndex16_16b_565()
{ void LOADERDECL Color_ReadIndex8_16b_565() { Color_ReadIndex_16b_565<u8>(); }
u16 Index = DataReadU16(); void LOADERDECL Color_ReadIndex8_24b_888() { Color_ReadIndex_24b_888<u8>(); }
u16 val = Common::swap16(*(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]))); void LOADERDECL Color_ReadIndex8_32b_888x() { Color_ReadIndex_32b_888x<u8>(); }
_SetCol565(val); void LOADERDECL Color_ReadIndex8_16b_4444() { Color_ReadIndex_16b_4444<u8>(); }
} void LOADERDECL Color_ReadIndex8_24b_6666() { Color_ReadIndex_24b_6666<u8>(); }
void LOADERDECL Color_ReadIndex16_24b_888() void LOADERDECL Color_ReadIndex8_32b_8888() { Color_ReadIndex_32b_8888<u8>(); }
{
u16 Index = DataReadU16(); void LOADERDECL Color_ReadIndex16_16b_565() { Color_ReadIndex_16b_565<u16>(); }
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]); void LOADERDECL Color_ReadIndex16_24b_888() { Color_ReadIndex_24b_888<u16>(); }
_SetCol(_Read24(iAddress)); void LOADERDECL Color_ReadIndex16_32b_888x() { Color_ReadIndex_32b_888x<u16>(); }
} void LOADERDECL Color_ReadIndex16_16b_4444() { Color_ReadIndex_16b_4444<u16>(); }
void LOADERDECL Color_ReadIndex16_32b_888x() void LOADERDECL Color_ReadIndex16_24b_6666() { Color_ReadIndex_24b_6666<u16>(); }
{ void LOADERDECL Color_ReadIndex16_32b_8888() { Color_ReadIndex_32b_8888<u16>(); }
u16 Index = DataReadU16();
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
_SetCol(_Read24(iAddress));
}
void LOADERDECL Color_ReadIndex16_16b_4444()
{
u16 Index = DataReadU16();
u16 val = *(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]));
_SetCol4444(val);
}
void LOADERDECL Color_ReadIndex16_24b_6666()
{
u16 Index = DataReadU16();
const u8 *pData = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]) - 1;
u32 val = Common::swap32(pData);
_SetCol6666(val);
}
void LOADERDECL Color_ReadIndex16_32b_8888()
{
u16 Index = DataReadU16();
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
_SetCol(_Read32(iAddress));
}
#endif

View File

@ -22,6 +22,7 @@
#include "VertexManagerBase.h" #include "VertexManagerBase.h"
#include "CPUDetect.h" #include "CPUDetect.h"
#include <cmath> #include <cmath>
#include <limits>
#if _M_SSE >= 0x401 #if _M_SSE >= 0x401
#include <smmintrin.h> #include <smmintrin.h>
@ -30,78 +31,163 @@
#include <tmmintrin.h> #include <tmmintrin.h>
#endif #endif
// warning: mapping buffer should be disabled to use this
#define LOG_NORM() // PRIM_LOG("norm: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-3], ((float*)VertexManager::s_pCurBufferPointer)[-2], ((float*)VertexManager::s_pCurBufferPointer)[-1]); #define LOG_NORM() // PRIM_LOG("norm: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-3], ((float*)VertexManager::s_pCurBufferPointer)[-2], ((float*)VertexManager::s_pCurBufferPointer)[-1]);
VertexLoader_Normal::Set VertexLoader_Normal::m_Table[NUM_NRM_TYPE][NUM_NRM_INDICES][NUM_NRM_ELEMENTS][NUM_NRM_FORMAT]; VertexLoader_Normal::Set VertexLoader_Normal::m_Table[NUM_NRM_TYPE][NUM_NRM_INDICES][NUM_NRM_ELEMENTS][NUM_NRM_FORMAT];
namespace
{
template <typename T>
__forceinline float FracAdjust(T val)
{
//auto const S8FRAC = 1.f / (1u << 6);
//auto const U8FRAC = 1.f / (1u << 7);
//auto const S16FRAC = 1.f / (1u << 14);
//auto const U16FRAC = 1.f / (1u << 15);
// TODO: is this right?
return val / float(1u << (sizeof(T) * 8 - std::numeric_limits<T>::is_signed - 1));
}
template <>
__forceinline float FracAdjust(float val)
{ return val; }
template <typename T, int N>
__forceinline void ReadIndirect(const T* data)
{
static_assert(3 == N || 9 == N, "N is only sane as 3 or 9!");
for (int i = 0; i != N; ++i)
{
DataWrite(FracAdjust(Common::FromBigEndian(data[i])));
}
LOG_NORM();
}
template <typename T, int N>
struct Normal_Direct
{
static void LOADERDECL function()
{
auto const source = reinterpret_cast<const T*>(DataGetPosition());
ReadIndirect<T, N * 3>(source);
DataSkip<N * 3 * sizeof(T)>();
}
static const int size = sizeof(T) * N * 3;
};
template <typename I, typename T, int N, int Offset>
__forceinline void Normal_Index_Offset()
{
static_assert(!std::numeric_limits<I>::is_signed, "Only unsigned I is sane!");
auto const index = DataRead<I>();
auto const data = reinterpret_cast<const T*>(cached_arraybases[ARRAY_NORMAL]
+ (index * arraystrides[ARRAY_NORMAL]) + sizeof(T) * 3 * Offset);
ReadIndirect<T, N * 3>(data);
}
template <typename I, typename T, int N>
struct Normal_Index
{
static void LOADERDECL function()
{
Normal_Index_Offset<I, T, N, 0>();
}
static const int size = sizeof(I);
};
template <typename I, typename T>
struct Normal_Index_Indices3
{
static void LOADERDECL function()
{
Normal_Index_Offset<I, T, 1, 0>();
Normal_Index_Offset<I, T, 1, 1>();
Normal_Index_Offset<I, T, 1, 2>();
}
static const int size = sizeof(I) * 3;
};
}
void VertexLoader_Normal::Init(void) void VertexLoader_Normal::Init(void)
{ {
// HACK is for signed instead of unsigned to prevent crashes. m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Normal_Direct<u8, 1>();
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Set(3, Normal_DirectByte); //HACK m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Normal_Direct<s8, 1>();
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Set(3, Normal_DirectByte); m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Normal_Direct<u16, 1>();
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Set(6, Normal_DirectShort); //HACK m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Normal_Direct<s16, 1>();
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Set(6, Normal_DirectShort); m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Normal_Direct<float, 1>();
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Set(12, Normal_DirectFloat); m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Normal_Direct<u8, 3>();
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Set(9, Normal_DirectByte3); //HACK m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Normal_Direct<s8, 3>();
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Set(9, Normal_DirectByte3); m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Normal_Direct<u16, 3>();
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Set(18, Normal_DirectShort3); //HACK m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Normal_Direct<s16, 3>();
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Set(18, Normal_DirectShort3); m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Normal_Direct<float, 3>();
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Set(36, Normal_DirectFloat3);
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Set(3, Normal_DirectByte); //HACK // Same as above
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Set(3, Normal_DirectByte); m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Normal_Direct<u8, 1>();
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Set(6, Normal_DirectShort); //HACK m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Normal_Direct<s8, 1>();
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Set(6, Normal_DirectShort); m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Normal_Direct<u16, 1>();
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Set(12, Normal_DirectFloat); m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Normal_Direct<s16, 1>();
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Set(9, Normal_DirectByte3); //HACK m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Normal_Direct<float, 1>();
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Set(9, Normal_DirectByte3); m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Normal_Direct<u8, 3>();
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Set(18, Normal_DirectShort3); //HACK m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Normal_Direct<s8, 3>();
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Set(18, Normal_DirectShort3); m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Direct<u16, 3>();
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Set(36, Normal_DirectFloat3); m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Direct<s16, 3>();
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Direct<float, 3>();
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Set(1, Normal_Index8_Byte); //HACK m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Normal_Index<u8, u8, 1>();
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Set(1, Normal_Index8_Byte); m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Normal_Index<u8, s8, 1>();
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Set(1, Normal_Index8_Short); //HACK m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Normal_Index<u8, u16, 1>();
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Set(1, Normal_Index8_Short); m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Normal_Index<u8, s16, 1>();
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Set(1, Normal_Index8_Float); m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Normal_Index<u8, float, 1>();
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Set(1, Normal_Index8_Byte3_Indices1); //HACK m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Normal_Index<u8, u8, 3>();
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Set(1, Normal_Index8_Byte3_Indices1); m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Normal_Index<u8, s8, 3>();
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Set(1, Normal_Index8_Short3_Indices1); //HACK m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Normal_Index<u8, u16, 3>();
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Set(1, Normal_Index8_Short3_Indices1); m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Normal_Index<u8, s16, 3>();
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Set(1, Normal_Index8_Float3_Indices1); m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Normal_Index<u8, float, 3>();
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Set(1, Normal_Index8_Byte); //HACK // Same as above for NRM_NBT
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Set(1, Normal_Index8_Byte); m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Normal_Index<u8, u8, 1>();
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Set(1, Normal_Index8_Short); //HACK m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Normal_Index<u8, s8, 1>();
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Set(1, Normal_Index8_Short); m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Normal_Index<u8, u16, 1>();
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Set(1, Normal_Index8_Float); m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Normal_Index<u8, s16, 1>();
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Set(3, Normal_Index8_Byte3_Indices3); //HACK m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Normal_Index<u8, float, 1>();
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Set(3, Normal_Index8_Byte3_Indices3); m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Normal_Index_Indices3<u8, u8>();
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Set(3, Normal_Index8_Short3_Indices3); //HACK m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Normal_Index_Indices3<u8, s8>();
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Set(3, Normal_Index8_Short3_Indices3); m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Index_Indices3<u8, u16>();
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Set(3, Normal_Index8_Float3_Indices3); m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Index_Indices3<u8, s16>();
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Index_Indices3<u8, float>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Set(2, Normal_Index16_Byte); //HACK m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Normal_Index<u16, u8, 1>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Set(2, Normal_Index16_Byte); m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Normal_Index<u16, s8, 1>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Set(2, Normal_Index16_Short); //HACK m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Normal_Index<u16, u16, 1>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Set(2, Normal_Index16_Short); m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Normal_Index<u16, s16, 1>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Set(2, Normal_Index16_Float); m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Normal_Index<u16, float, 1>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Set(2, Normal_Index16_Byte3_Indices1); //HACK m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Normal_Index<u16, u8, 3>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Set(2, Normal_Index16_Byte3_Indices1); m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Normal_Index<u16, s8, 3>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Set(2, Normal_Index16_Short3_Indices1); //HACK m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Normal_Index<u16, u16, 3>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Set(2, Normal_Index16_Short3_Indices1); m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Normal_Index<u16, s16, 3>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Set(2, Normal_Index16_Float3_Indices1); m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Normal_Index<u16, float, 3>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Set(2, Normal_Index16_Byte); //HACK // Same as above for NRM_NBT
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Set(2, Normal_Index16_Byte); m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Normal_Index<u16, u8, 1>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Set(2, Normal_Index16_Short); //HACK m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Normal_Index<u16, s8, 1>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Set(2, Normal_Index16_Short); m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Normal_Index<u16, u16, 1>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Set(2, Normal_Index16_Float); m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Normal_Index<u16, s16, 1>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Set(6, Normal_Index16_Byte3_Indices3); //HACK m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Normal_Index<u16, float, 1>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Set(6, Normal_Index16_Byte3_Indices3); m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Normal_Index_Indices3<u16, u8>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Set(6, Normal_Index16_Short3_Indices3); //HACK m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Normal_Index_Indices3<u16, s8>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Set(6, Normal_Index16_Short3_Indices3); m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Index_Indices3<u16, u16>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Set(6, Normal_Index16_Float3_Indices3); m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Index_Indices3<u16, s16>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Index_Indices3<u16, float>();
} }
unsigned int VertexLoader_Normal::GetSize(unsigned int _type, unsigned int VertexLoader_Normal::GetSize(unsigned int _type,
@ -116,312 +202,3 @@ TPipelineFunction VertexLoader_Normal::GetFunction(unsigned int _type,
TPipelineFunction pFunc = m_Table[_type][_index3][_elements][_format].function; TPipelineFunction pFunc = m_Table[_type][_index3][_elements][_format].function;
return pFunc; return pFunc;
} }
// This fracs are fixed acording to format
#define S8FRAC 0.015625f; // 1.0f / (1U << 6)
#define S16FRAC 0.00006103515625f; // 1.0f / (1U << 14)
// --- Direct ---
inline void ReadIndirectS8x3(const s8* pData)
{
((float*)VertexManager::s_pCurBufferPointer)[0] = pData[0] * S8FRAC;
((float*)VertexManager::s_pCurBufferPointer)[1] = pData[1] * S8FRAC;
((float*)VertexManager::s_pCurBufferPointer)[2] = pData[2] * S8FRAC;
VertexManager::s_pCurBufferPointer += 12;
LOG_NORM();
}
inline void ReadIndirectS8x9(const s8* pData)
{
((float*)VertexManager::s_pCurBufferPointer)[0] = pData[0] * S8FRAC;
((float*)VertexManager::s_pCurBufferPointer)[1] = pData[1] * S8FRAC;
((float*)VertexManager::s_pCurBufferPointer)[2] = pData[2] * S8FRAC;
LOG_NORM();
((float*)VertexManager::s_pCurBufferPointer)[3] = pData[3] * S8FRAC;
((float*)VertexManager::s_pCurBufferPointer)[4] = pData[4] * S8FRAC;
((float*)VertexManager::s_pCurBufferPointer)[5] = pData[5] * S8FRAC;
LOG_NORM();
((float*)VertexManager::s_pCurBufferPointer)[6] = pData[6] * S8FRAC;
((float*)VertexManager::s_pCurBufferPointer)[7] = pData[7] * S8FRAC;
((float*)VertexManager::s_pCurBufferPointer)[8] = pData[8] * S8FRAC;
LOG_NORM();
VertexManager::s_pCurBufferPointer += 36;
}
inline void ReadIndirectS16x3(const u16* pData)
{
((float*)VertexManager::s_pCurBufferPointer)[0] = ((s16)Common::swap16(pData[0])) * S16FRAC;
((float*)VertexManager::s_pCurBufferPointer)[1] = ((s16)Common::swap16(pData[1])) * S16FRAC;
((float*)VertexManager::s_pCurBufferPointer)[2] = ((s16)Common::swap16(pData[2])) * S16FRAC;
VertexManager::s_pCurBufferPointer += 12;
LOG_NORM()
}
inline void ReadIndirectS16x9(const u16* pData)
{
((float*)VertexManager::s_pCurBufferPointer)[0] = ((s16)Common::swap16(pData[0])) * S16FRAC;
((float*)VertexManager::s_pCurBufferPointer)[1] = ((s16)Common::swap16(pData[1])) * S16FRAC;
((float*)VertexManager::s_pCurBufferPointer)[2] = ((s16)Common::swap16(pData[2])) * S16FRAC;
LOG_NORM()
((float*)VertexManager::s_pCurBufferPointer)[3] = ((s16)Common::swap16(pData[3])) * S16FRAC;
((float*)VertexManager::s_pCurBufferPointer)[4] = ((s16)Common::swap16(pData[4])) * S16FRAC;
((float*)VertexManager::s_pCurBufferPointer)[5] = ((s16)Common::swap16(pData[5])) * S16FRAC;
LOG_NORM()
((float*)VertexManager::s_pCurBufferPointer)[6] = ((s16)Common::swap16(pData[6])) * S16FRAC;
((float*)VertexManager::s_pCurBufferPointer)[7] = ((s16)Common::swap16(pData[7])) * S16FRAC;
((float*)VertexManager::s_pCurBufferPointer)[8] = ((s16)Common::swap16(pData[8])) * S16FRAC;
LOG_NORM()
VertexManager::s_pCurBufferPointer += 36;
}
inline void ReadIndirectFloatx3(const u32* pData)
{
((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]);
((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]);
((u32*)VertexManager::s_pCurBufferPointer)[2] = Common::swap32(pData[2]);
VertexManager::s_pCurBufferPointer += 12;
LOG_NORM();
}
inline void ReadIndirectFloatx9(const u32* pData)
{
((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]);
((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]);
((u32*)VertexManager::s_pCurBufferPointer)[2] = Common::swap32(pData[2]);
LOG_NORM();
((u32*)VertexManager::s_pCurBufferPointer)[3] = Common::swap32(pData[3]);
((u32*)VertexManager::s_pCurBufferPointer)[4] = Common::swap32(pData[4]);
((u32*)VertexManager::s_pCurBufferPointer)[5] = Common::swap32(pData[5]);
LOG_NORM();
((u32*)VertexManager::s_pCurBufferPointer)[6] = Common::swap32(pData[6]);
((u32*)VertexManager::s_pCurBufferPointer)[7] = Common::swap32(pData[7]);
((u32*)VertexManager::s_pCurBufferPointer)[8] = Common::swap32(pData[8]);
LOG_NORM();
VertexManager::s_pCurBufferPointer += 36;
}
inline void ReadDirectS8x3()
{
const s8* Source = (const s8*)DataGetPosition();
ReadIndirectS8x3(Source);
DataSkip(3);
}
inline void ReadDirectS8x9()
{
const s8* Source = (const s8*)DataGetPosition();
ReadIndirectS8x9(Source);
DataSkip(9);
}
inline void ReadDirectS16x3()
{
const u16* Source = (const u16*)DataGetPosition();
ReadIndirectS16x3(Source);
DataSkip(6);
}
inline void ReadDirectS16x9()
{
const u16* Source = (const u16*)DataGetPosition();
ReadIndirectS16x9(Source);
DataSkip(18);
}
inline void ReadDirectFloatx3()
{
const u32* Source = (const u32*)DataGetPosition();
ReadIndirectFloatx3(Source);
DataSkip(12);
}
inline void ReadDirectFloatx9()
{
const u32* Source = (const u32*)DataGetPosition();
ReadIndirectFloatx9(Source);
DataSkip(36);
}
void LOADERDECL VertexLoader_Normal::Normal_DirectByte()
{
ReadDirectS8x3();
}
void LOADERDECL VertexLoader_Normal::Normal_DirectShort()
{
ReadDirectS16x3();
}
void LOADERDECL VertexLoader_Normal::Normal_DirectFloat()
{
ReadDirectFloatx3();
}
void LOADERDECL VertexLoader_Normal::Normal_DirectByte3()
{
ReadDirectS8x9();
}
void LOADERDECL VertexLoader_Normal::Normal_DirectShort3()
{
ReadDirectS16x9();
}
void LOADERDECL VertexLoader_Normal::Normal_DirectFloat3()
{
ReadDirectFloatx9();
}
// --- Index8 ---
void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte()
{
u8 Index = DataReadU8();
const s8* pData = (const s8 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
ReadIndirectS8x3(pData);
}
void LOADERDECL VertexLoader_Normal::Normal_Index8_Short()
{
u8 Index = DataReadU8();
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
ReadIndirectS16x3(pData);
}
void LOADERDECL VertexLoader_Normal::Normal_Index8_Float()
{
u8 Index = DataReadU8();
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
ReadIndirectFloatx3(pData);
}
void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte3_Indices1()
{
u8 Index = DataReadU8();
const s8* pData = (const s8 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
ReadIndirectS8x9(pData);
}
void LOADERDECL VertexLoader_Normal::Normal_Index8_Short3_Indices1()
{
u8 Index = DataReadU8();
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
ReadIndirectS16x9(pData);
}
void LOADERDECL VertexLoader_Normal::Normal_Index8_Float3_Indices1()
{
u8 Index = DataReadU8();
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
ReadIndirectFloatx9(pData);
}
void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte3_Indices3()
{
for (int i = 0; i < 3; i++)
{
u8 Index = DataReadU8();
const s8* pData = (const s8*)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 1*3*i);
ReadIndirectS8x3(pData);
}
}
void LOADERDECL VertexLoader_Normal::Normal_Index8_Short3_Indices3()
{
for (int i = 0; i < 3; i++)
{
u8 Index = DataReadU8();
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 2*3*i);
ReadIndirectS16x3(pData);
}
}
void LOADERDECL VertexLoader_Normal::Normal_Index8_Float3_Indices3()
{
for (int i = 0; i < 3; i++)
{
u8 Index = DataReadU8();
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 4*3*i);
ReadIndirectFloatx3(pData);
}
}
// --- Index16 ---
void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte()
{
u16 Index = DataReadU16();
const s8* pData = (const s8 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
ReadIndirectS8x3(pData);
}
void LOADERDECL VertexLoader_Normal::Normal_Index16_Short()
{
u16 Index = DataReadU16();
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
ReadIndirectS16x3(pData);
}
void LOADERDECL VertexLoader_Normal::Normal_Index16_Float()
{
u16 Index = DataReadU16();
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
ReadIndirectFloatx3(pData);
}
void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte3_Indices1()
{
u16 Index = DataReadU16();
const s8* pData = (const s8 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
ReadIndirectS8x9(pData);
}
void LOADERDECL VertexLoader_Normal::Normal_Index16_Short3_Indices1()
{
u16 Index = DataReadU16();
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
ReadIndirectS16x9(pData);
}
void LOADERDECL VertexLoader_Normal::Normal_Index16_Float3_Indices1()
{
u16 Index = DataReadU16();
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
ReadIndirectFloatx9(pData);
}
void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte3_Indices3()
{
for (int i = 0; i < 3; i++)
{
u16 Index = DataReadU16();
const s8* pData = (const s8*)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 1*3*i);
ReadIndirectS8x3(pData);
}
}
void LOADERDECL VertexLoader_Normal::Normal_Index16_Short3_Indices3()
{
for (int i = 0; i < 3; i++)
{
u16 Index = DataReadU16();
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 2*3*i);
ReadIndirectS16x3(pData);
}
}
void LOADERDECL VertexLoader_Normal::Normal_Index16_Float3_Indices3()
{
for (int i = 0; i < 3; i++)
{
u16 Index = DataReadU16();
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 4*3*i);
ReadIndirectFloatx3(pData);
}
}

View File

@ -70,45 +70,20 @@ private:
NUM_NRM_INDICES NUM_NRM_INDICES
}; };
struct Set { struct Set
Set() {} {
Set(int gc_size_, TPipelineFunction function_) : gc_size(gc_size_), function(function_) {} template <typename T>
void operator=(const T&)
{
gc_size = T::size;
function = T::function;
}
int gc_size; int gc_size;
TPipelineFunction function; TPipelineFunction function;
// int pc_size;
}; };
static Set m_Table[NUM_NRM_TYPE][NUM_NRM_INDICES][NUM_NRM_ELEMENTS][NUM_NRM_FORMAT]; static Set m_Table[NUM_NRM_TYPE][NUM_NRM_INDICES][NUM_NRM_ELEMENTS][NUM_NRM_FORMAT];
// direct
static void LOADERDECL Normal_DirectByte();
static void LOADERDECL Normal_DirectShort();
static void LOADERDECL Normal_DirectFloat();
static void LOADERDECL Normal_DirectByte3();
static void LOADERDECL Normal_DirectShort3();
static void LOADERDECL Normal_DirectFloat3();
// index8
static void LOADERDECL Normal_Index8_Byte();
static void LOADERDECL Normal_Index8_Short();
static void LOADERDECL Normal_Index8_Float();
static void LOADERDECL Normal_Index8_Byte3_Indices1();
static void LOADERDECL Normal_Index8_Short3_Indices1();
static void LOADERDECL Normal_Index8_Float3_Indices1();
static void LOADERDECL Normal_Index8_Byte3_Indices3();
static void LOADERDECL Normal_Index8_Short3_Indices3();
static void LOADERDECL Normal_Index8_Float3_Indices3();
// index16
static void LOADERDECL Normal_Index16_Byte();
static void LOADERDECL Normal_Index16_Short();
static void LOADERDECL Normal_Index16_Float();
static void LOADERDECL Normal_Index16_Byte3_Indices1();
static void LOADERDECL Normal_Index16_Short3_Indices1();
static void LOADERDECL Normal_Index16_Float3_Indices1();
static void LOADERDECL Normal_Index16_Byte3_Indices3();
static void LOADERDECL Normal_Index16_Short3_Indices3();
static void LOADERDECL Normal_Index16_Float3_Indices3();
}; };
#endif #endif

View File

@ -15,6 +15,8 @@
// Official SVN repository and contact information can be found at // Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/ // http://code.google.com/p/dolphin-emu/
#include <limits>
#include "Common.h" #include "Common.h"
#include "VideoCommon.h" #include "VideoCommon.h"
#include "VertexLoader.h" #include "VertexLoader.h"
@ -71,101 +73,42 @@ MOVUPS(MOffset(EDI, 0), XMM0);
*/ */
// ============================================================================== template <typename T>
// Direct float PosScale(T val)
// ==============================================================================
template <class T, bool three>
void Pos_ReadDirect()
{ {
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(T)DataRead<T>() * posScale; return val * posScale;
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(T)DataRead<T>() * posScale; }
if (three)
((float*)VertexManager::s_pCurBufferPointer)[2] = (float)(T)DataRead<T>() * posScale; template <>
else float PosScale(float val)
((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f; { return val; }
template <typename T, int N>
void LOADERDECL Pos_ReadDirect()
{
static_assert(N <= 3, "N > 3 is not sane!");
for (int i = 0; i < 3; ++i)
DataWrite(i<N ? PosScale(DataRead<T>()) : 0.f);
LOG_VTX(); LOG_VTX();
VertexManager::s_pCurBufferPointer += 12;
} }
void LOADERDECL Pos_ReadDirect_UByte3() { Pos_ReadDirect<u8, true>(); } template <typename I, typename T, int N>
void LOADERDECL Pos_ReadDirect_Byte3() { Pos_ReadDirect<s8, true>(); } void LOADERDECL Pos_ReadIndex()
void LOADERDECL Pos_ReadDirect_UShort3() { Pos_ReadDirect<u16, true>(); }
void LOADERDECL Pos_ReadDirect_Short3() { Pos_ReadDirect<s16, true>(); }
void LOADERDECL Pos_ReadDirect_UByte2() { Pos_ReadDirect<u8, false>(); }
void LOADERDECL Pos_ReadDirect_Byte2() { Pos_ReadDirect<s8, false>(); }
void LOADERDECL Pos_ReadDirect_UShort2() { Pos_ReadDirect<u16, false>(); }
void LOADERDECL Pos_ReadDirect_Short2() { Pos_ReadDirect<s16, false>(); }
void LOADERDECL Pos_ReadDirect_Float3()
{ {
// No need to use floating point here. static_assert(!std::numeric_limits<I>::is_signed, "Only unsigned I is sane!");
((u32 *)VertexManager::s_pCurBufferPointer)[0] = DataReadU32(); static_assert(N <= 3, "N > 3 is not sane!");
((u32 *)VertexManager::s_pCurBufferPointer)[1] = DataReadU32();
((u32 *)VertexManager::s_pCurBufferPointer)[2] = DataReadU32();
LOG_VTX();
VertexManager::s_pCurBufferPointer += 12;
}
void LOADERDECL Pos_ReadDirect_Float2() auto const index = DataRead<I>();
{ if (index < std::numeric_limits<I>::max())
// No need to use floating point here.
((u32 *)VertexManager::s_pCurBufferPointer)[0] = DataReadU32();
((u32 *)VertexManager::s_pCurBufferPointer)[1] = DataReadU32();
((u32 *)VertexManager::s_pCurBufferPointer)[2] = 0;
LOG_VTX();
VertexManager::s_pCurBufferPointer += 12;
}
template<class T, bool three,int MaxSize>
inline void Pos_ReadIndex_Byte(int Index)
{
if(Index < MaxSize)
{ {
const u8* pData = cached_arraybases[ARRAY_POSITION] + ((u32)Index * arraystrides[ARRAY_POSITION]); auto const data = reinterpret_cast<const T*>(cached_arraybases[ARRAY_POSITION] + (index * arraystrides[ARRAY_POSITION]));
((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(T)(pData[0])) * posScale;
((float*)VertexManager::s_pCurBufferPointer)[1] = ((float)(T)(pData[1])) * posScale;
if (three)
((float*)VertexManager::s_pCurBufferPointer)[2] = ((float)(T)(pData[2])) * posScale;
else
((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f;
LOG_VTX();
VertexManager::s_pCurBufferPointer += 12;
}
}
template<class T, bool three,int MaxSize> for (int i = 0; i < 3; ++i)
inline void Pos_ReadIndex_Short(int Index) DataWrite(i<N ? PosScale(Common::FromBigEndian(data[i])) : 0.f);
{
if(Index < MaxSize)
{
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_POSITION] + ((u32)Index * arraystrides[ARRAY_POSITION]));
((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(T)Common::swap16(pData[0])) * posScale;
((float*)VertexManager::s_pCurBufferPointer)[1] = ((float)(T)Common::swap16(pData[1])) * posScale;
if (three)
((float*)VertexManager::s_pCurBufferPointer)[2] = ((float)(T)Common::swap16(pData[2])) * posScale;
else
((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f;
LOG_VTX();
VertexManager::s_pCurBufferPointer += 12;
}
}
template<bool three,int MaxSize>
void Pos_ReadIndex_Float(int Index)
{
if(Index < MaxSize)
{
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (Index * arraystrides[ARRAY_POSITION]));
((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]);
((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]);
if (three)
((u32*)VertexManager::s_pCurBufferPointer)[2] = Common::swap32(pData[2]);
else
((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f;
LOG_VTX(); LOG_VTX();
VertexManager::s_pCurBufferPointer += 12;
} }
} }
@ -173,87 +116,22 @@ void Pos_ReadIndex_Float(int Index)
static const __m128i kMaskSwap32_3 = _mm_set_epi32(0xFFFFFFFFL, 0x08090A0BL, 0x04050607L, 0x00010203L); static const __m128i kMaskSwap32_3 = _mm_set_epi32(0xFFFFFFFFL, 0x08090A0BL, 0x04050607L, 0x00010203L);
static const __m128i kMaskSwap32_2 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x04050607L, 0x00010203L); static const __m128i kMaskSwap32_2 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x04050607L, 0x00010203L);
template<bool three,int MaxSize> template <typename I, bool three>
void Pos_ReadIndex_Float_SSSE3(int Index) void LOADERDECL Pos_ReadIndex_Float_SSSE3()
{ {
if(Index < MaxSize) auto const index = DataRead<I>();
if (index < std::numeric_limits<I>::max())
{ {
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (Index * arraystrides[ARRAY_POSITION])); const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (index * arraystrides[ARRAY_POSITION]));
GC_ALIGNED128(const __m128i a = _mm_loadu_si128((__m128i*)pData)); GC_ALIGNED128(const __m128i a = _mm_loadu_si128((__m128i*)pData));
GC_ALIGNED128(__m128i b = _mm_shuffle_epi8(a, three ? kMaskSwap32_3 : kMaskSwap32_2)); GC_ALIGNED128(__m128i b = _mm_shuffle_epi8(a, three ? kMaskSwap32_3 : kMaskSwap32_2));
_mm_storeu_si128((__m128i*)VertexManager::s_pCurBufferPointer, b); _mm_storeu_si128((__m128i*)VertexManager::s_pCurBufferPointer, b);
VertexManager::s_pCurBufferPointer += sizeof(float) * 3;
LOG_VTX(); LOG_VTX();
VertexManager::s_pCurBufferPointer += 12;
} }
} }
#endif #endif
// Explicitly instantiate these functions to decrease the possibility of
// symbol binding problems when (only) calling them from JIT compiled code.
template void Pos_ReadDirect<u8, true>();
template void Pos_ReadDirect<s8, true>();
template void Pos_ReadDirect<u16, true>();
template void Pos_ReadDirect<s16, true>();
template void Pos_ReadDirect<u8, false>();
template void Pos_ReadDirect<s8, false>();
template void Pos_ReadDirect<u16, false>();
template void Pos_ReadDirect<s16, false>();
template void Pos_ReadIndex_Byte<u8, true, 255>(int Index);
template void Pos_ReadIndex_Byte<s8, true, 255>(int Index);
template void Pos_ReadIndex_Short<u16, true, 255>(int Index);
template void Pos_ReadIndex_Short<s16, true, 255>(int Index);
template void Pos_ReadIndex_Float<true, 255>(int Index);
template void Pos_ReadIndex_Byte<u8, false, 255>(int Index);
template void Pos_ReadIndex_Byte<s8, false, 255>(int Index);
template void Pos_ReadIndex_Short<u16, false, 255>(int Index);
template void Pos_ReadIndex_Short<s16, false, 255>(int Index);
template void Pos_ReadIndex_Float<false, 255>(int Index);
template void Pos_ReadIndex_Byte<u8, true, 65535>(int Index);
template void Pos_ReadIndex_Byte<s8, true, 65535>(int Index);
template void Pos_ReadIndex_Short<u16, true, 65535>(int Index);
template void Pos_ReadIndex_Short<s16, true, 65535>(int Index);
template void Pos_ReadIndex_Float<true, 65535>(int Index);
template void Pos_ReadIndex_Byte<u8, false, 65535>(int Index);
template void Pos_ReadIndex_Byte<s8, false, 65535>(int Index);
template void Pos_ReadIndex_Short<u16, false, 65535>(int Index);
template void Pos_ReadIndex_Short<s16, false, 65535>(int Index);
template void Pos_ReadIndex_Float<false, 65535>(int Index);
// ==============================================================================
// Index 8
// ==============================================================================
void LOADERDECL Pos_ReadIndex8_UByte3() {Pos_ReadIndex_Byte<u8, true, 255> (DataReadU8());}
void LOADERDECL Pos_ReadIndex8_Byte3() {Pos_ReadIndex_Byte<s8, true, 255> (DataReadU8());}
void LOADERDECL Pos_ReadIndex8_UShort3() {Pos_ReadIndex_Short<u16, true, 255> (DataReadU8());}
void LOADERDECL Pos_ReadIndex8_Short3() {Pos_ReadIndex_Short<s16, true, 255> (DataReadU8());}
void LOADERDECL Pos_ReadIndex8_Float3() {Pos_ReadIndex_Float<true, 255> (DataReadU8());}
void LOADERDECL Pos_ReadIndex8_UByte2() {Pos_ReadIndex_Byte<u8, false, 255>(DataReadU8());}
void LOADERDECL Pos_ReadIndex8_Byte2() {Pos_ReadIndex_Byte<s8, false, 255>(DataReadU8());}
void LOADERDECL Pos_ReadIndex8_UShort2() {Pos_ReadIndex_Short<u16, false, 255>(DataReadU8());}
void LOADERDECL Pos_ReadIndex8_Short2() {Pos_ReadIndex_Short<s16, false, 255>(DataReadU8());}
void LOADERDECL Pos_ReadIndex8_Float2() {Pos_ReadIndex_Float<false, 255> (DataReadU8());}
// ==============================================================================
// Index 16
// ==============================================================================
void LOADERDECL Pos_ReadIndex16_UByte3() {Pos_ReadIndex_Byte<u8, true, 65535> (DataReadU16());}
void LOADERDECL Pos_ReadIndex16_Byte3() {Pos_ReadIndex_Byte<s8, true, 65535> (DataReadU16());}
void LOADERDECL Pos_ReadIndex16_UShort3() {Pos_ReadIndex_Short<u16, true, 65535> (DataReadU16());}
void LOADERDECL Pos_ReadIndex16_Short3() {Pos_ReadIndex_Short<s16, true, 65535> (DataReadU16());}
void LOADERDECL Pos_ReadIndex16_Float3() {Pos_ReadIndex_Float<true, 65535> (DataReadU16());}
void LOADERDECL Pos_ReadIndex16_UByte2() {Pos_ReadIndex_Byte<u8, false, 65535>(DataReadU16());}
void LOADERDECL Pos_ReadIndex16_Byte2() {Pos_ReadIndex_Byte<s8, false, 65535>(DataReadU16());}
void LOADERDECL Pos_ReadIndex16_UShort2() {Pos_ReadIndex_Short<u16, false, 65535>(DataReadU16());}
void LOADERDECL Pos_ReadIndex16_Short2() {Pos_ReadIndex_Short<s16, false, 65535>(DataReadU16());}
void LOADERDECL Pos_ReadIndex16_Float2() {Pos_ReadIndex_Float<false, 65535> (DataReadU16());}
#if _M_SSE >= 0x301
void LOADERDECL Pos_ReadIndex8_Float3_SSSE3() {Pos_ReadIndex_Float_SSSE3<true, 255> (DataReadU8());}
void LOADERDECL Pos_ReadIndex8_Float2_SSSE3() {Pos_ReadIndex_Float_SSSE3<false, 255> (DataReadU8());}
void LOADERDECL Pos_ReadIndex16_Float3_SSSE3() {Pos_ReadIndex_Float_SSSE3<true, 65535> (DataReadU16());}
void LOADERDECL Pos_ReadIndex16_Float2_SSSE3() {Pos_ReadIndex_Float_SSSE3<false, 65535> (DataReadU16());}
#endif
static TPipelineFunction tableReadPosition[4][8][2] = { static TPipelineFunction tableReadPosition[4][8][2] = {
{ {
{NULL, NULL,}, {NULL, NULL,},
@ -263,56 +141,40 @@ static TPipelineFunction tableReadPosition[4][8][2] = {
{NULL, NULL,}, {NULL, NULL,},
}, },
{ {
{Pos_ReadDirect_UByte2, Pos_ReadDirect_UByte3,}, {Pos_ReadDirect<u8, 2>, Pos_ReadDirect<u8, 3>,},
{Pos_ReadDirect_Byte2, Pos_ReadDirect_Byte3,}, {Pos_ReadDirect<s8, 2>, Pos_ReadDirect<s8, 3>,},
{Pos_ReadDirect_UShort2, Pos_ReadDirect_UShort3,}, {Pos_ReadDirect<u16, 2>, Pos_ReadDirect<u16, 3>,},
{Pos_ReadDirect_Short2, Pos_ReadDirect_Short3,}, {Pos_ReadDirect<s16, 2>, Pos_ReadDirect<s16, 3>,},
{Pos_ReadDirect_Float2, Pos_ReadDirect_Float3,}, {Pos_ReadDirect<float, 2>, Pos_ReadDirect<float, 3>,},
}, },
{ {
{Pos_ReadIndex8_UByte2, Pos_ReadIndex8_UByte3,}, {Pos_ReadIndex<u8, u8, 2>, Pos_ReadIndex<u8, u8, 3>,},
{Pos_ReadIndex8_Byte2, Pos_ReadIndex8_Byte3,}, {Pos_ReadIndex<u8, s8, 2>, Pos_ReadIndex<u8, s8, 3>,},
{Pos_ReadIndex8_UShort2, Pos_ReadIndex8_UShort3,}, {Pos_ReadIndex<u8, u16, 2>, Pos_ReadIndex<u8, u16, 3>,},
{Pos_ReadIndex8_Short2, Pos_ReadIndex8_Short3,}, {Pos_ReadIndex<u8, s16, 2>, Pos_ReadIndex<u8, s16, 3>,},
{Pos_ReadIndex8_Float2, Pos_ReadIndex8_Float3,}, {Pos_ReadIndex<u8, float, 2>, Pos_ReadIndex<u8, float, 3>,},
}, },
{ {
{Pos_ReadIndex16_UByte2, Pos_ReadIndex16_UByte3,}, {Pos_ReadIndex<u16, u8, 2>, Pos_ReadIndex<u16, u8, 3>,},
{Pos_ReadIndex16_Byte2, Pos_ReadIndex16_Byte3,}, {Pos_ReadIndex<u16, s8, 2>, Pos_ReadIndex<u16, s8, 3>,},
{Pos_ReadIndex16_UShort2, Pos_ReadIndex16_UShort3,}, {Pos_ReadIndex<u16, u16, 2>, Pos_ReadIndex<u16, u16, 3>,},
{Pos_ReadIndex16_Short2, Pos_ReadIndex16_Short3,}, {Pos_ReadIndex<u16, s16, 2>, Pos_ReadIndex<u16, s16, 3>,},
{Pos_ReadIndex16_Float2, Pos_ReadIndex16_Float3,}, {Pos_ReadIndex<u16, float, 2>, Pos_ReadIndex<u16, float, 3>,},
}, },
}; };
static int tableReadPositionVertexSize[4][8][2] = { static int tableReadPositionVertexSize[4][8][2] = {
{ {
{0, 0,}, {0, 0,}, {0, 0,}, {0, 0,}, {0, 0,}, {0, 0,},
{0, 0,},
{0, 0,},
{0, 0,},
{0, 0,},
}, },
{ {
{2, 3,}, {2, 3,}, {2, 3,}, {4, 6,}, {4, 6,}, {8, 12,},
{2, 3,},
{4, 6,},
{4, 6,},
{8, 12,},
}, },
{ {
{1, 1,}, {1, 1,}, {1, 1,}, {1, 1,}, {1, 1,}, {1, 1,},
{1, 1,},
{1, 1,},
{1, 1,},
{1, 1,},
}, },
{ {
{2, 2,}, {2, 2,}, {2, 2,}, {2, 2,}, {2, 2,}, {2, 2,},
{2, 2,},
{2, 2,},
{2, 2,},
{2, 2,},
}, },
}; };
@ -322,10 +184,10 @@ void VertexLoader_Position::Init(void) {
#if _M_SSE >= 0x301 #if _M_SSE >= 0x301
if (cpu_info.bSSSE3) { if (cpu_info.bSSSE3) {
tableReadPosition[2][4][0] = Pos_ReadIndex8_Float2_SSSE3; tableReadPosition[2][4][0] = Pos_ReadIndex_Float_SSSE3<u8, false>;
tableReadPosition[2][4][1] = Pos_ReadIndex8_Float3_SSSE3; tableReadPosition[2][4][1] = Pos_ReadIndex_Float_SSSE3<u8, true>;
tableReadPosition[3][4][0] = Pos_ReadIndex16_Float2_SSSE3; tableReadPosition[3][4][0] = Pos_ReadIndex_Float_SSSE3<u16, false>;
tableReadPosition[3][4][1] = Pos_ReadIndex16_Float3_SSSE3; tableReadPosition[3][4][1] = Pos_ReadIndex_Float_SSSE3<u16, true>;
} }
#endif #endif

View File

@ -28,8 +28,22 @@
#include <tmmintrin.h> #include <tmmintrin.h>
#endif #endif
#define LOG_TEX1() // PRIM_LOG("tex: %f, ", ((float*)VertexManager::s_pCurBufferPointer)[0]); template <int N>
#define LOG_TEX2() // PRIM_LOG("tex: %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[0], ((float*)VertexManager::s_pCurBufferPointer)[1]); void LOG_TEX();
template <>
__forceinline void LOG_TEX<1>()
{
// warning: mapping buffer should be disabled to use this
// PRIM_LOG("tex: %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-1]);
}
template <>
__forceinline void LOG_TEX<2>()
{
// warning: mapping buffer should be disabled to use this
// PRIM_LOG("tex: %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-2], ((float*)VertexManager::s_pCurBufferPointer)[-1]);
}
extern int tcIndex; extern int tcIndex;
extern float tcScale[8]; extern float tcScale[8];
@ -39,279 +53,54 @@ void LOADERDECL TexCoord_Read_Dummy()
tcIndex++; tcIndex++;
} }
void LOADERDECL TexCoord_ReadDirect_UByte1() template <typename T>
float TCScale(T val)
{ {
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU8() * tcScale[tcIndex]; return val * tcScale[tcIndex];
LOG_TEX1();
VertexManager::s_pCurBufferPointer += 4;
tcIndex++;
}
void LOADERDECL TexCoord_ReadDirect_UByte2()
{
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU8() * tcScale[tcIndex];
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)DataReadU8() * tcScale[tcIndex];
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
tcIndex++;
} }
void LOADERDECL TexCoord_ReadDirect_Byte1() template <>
float TCScale(float val)
{ return val; }
template <typename T, int N>
void LOADERDECL TexCoord_ReadDirect()
{ {
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)DataReadU8() * tcScale[tcIndex]; for (int i = 0; i != N; ++i)
LOG_TEX1(); DataWrite(TCScale(DataRead<T>()));
VertexManager::s_pCurBufferPointer += 4;
tcIndex++; LOG_TEX<N>();
}
void LOADERDECL TexCoord_ReadDirect_Byte2() ++tcIndex;
{
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)DataReadU8() * tcScale[tcIndex];
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s8)DataReadU8() * tcScale[tcIndex];
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
tcIndex++;
} }
void LOADERDECL TexCoord_ReadDirect_UShort1() template <typename I, typename T, int N>
void LOADERDECL TexCoord_ReadIndex()
{ {
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU16() * tcScale[tcIndex]; static_assert(!std::numeric_limits<I>::is_signed, "Only unsigned I is sane!");
LOG_TEX1();
VertexManager::s_pCurBufferPointer += 4;
tcIndex++;
}
void LOADERDECL TexCoord_ReadDirect_UShort2()
{
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU16() * tcScale[tcIndex];
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)DataReadU16() * tcScale[tcIndex];
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
tcIndex++;
}
void LOADERDECL TexCoord_ReadDirect_Short1() auto const index = DataRead<I>();
{ auto const data = reinterpret_cast<const T*>(cached_arraybases[ARRAY_TEXCOORD0 + tcIndex]
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)DataReadU16() * tcScale[tcIndex]; + (index * arraystrides[ARRAY_TEXCOORD0 + tcIndex]));
LOG_TEX1();
VertexManager::s_pCurBufferPointer += 4;
tcIndex++;
}
void LOADERDECL TexCoord_ReadDirect_Short2()
{
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)DataReadU16() * tcScale[tcIndex];
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s16)DataReadU16() * tcScale[tcIndex];
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
tcIndex++;
}
void LOADERDECL TexCoord_ReadDirect_Float1() for (int i = 0; i != N; ++i)
{ DataWrite(TCScale(Common::FromBigEndian(data[i])));
((u32*)VertexManager::s_pCurBufferPointer)[0] = DataReadU32();
LOG_TEX1();
VertexManager::s_pCurBufferPointer += 4;
tcIndex++;
}
void LOADERDECL TexCoord_ReadDirect_Float2()
{
((u32*)VertexManager::s_pCurBufferPointer)[0] = DataReadU32();
((u32*)VertexManager::s_pCurBufferPointer)[1] = DataReadU32();
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
tcIndex++;
}
// ================================================================================== LOG_TEX<N>();
void LOADERDECL TexCoord_ReadIndex8_UByte1() ++tcIndex;
{
u8 Index = DataReadU8();
const u8 *pData = cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(*pData) * tcScale[tcIndex];
LOG_TEX1();
VertexManager::s_pCurBufferPointer += 4;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex8_UByte2()
{
u8 Index = DataReadU8();
const u8 *pData = cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u8)(pData[0]) * tcScale[tcIndex];
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(u8)(pData[1]) * tcScale[tcIndex];
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex8_Byte1()
{
u8 Index = DataReadU8();
const u8 *pData = cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)(*pData) * tcScale[tcIndex];
LOG_TEX1();
VertexManager::s_pCurBufferPointer += 4;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex8_Byte2()
{
u8 Index = DataReadU8();
const u8 *pData = cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)(pData[0]) * tcScale[tcIndex];
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s8)(pData[1]) * tcScale[tcIndex];
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex8_UShort1()
{
u8 Index = DataReadU8();
const u16 *pData = (const u16 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u16)Common::swap16(*pData) * tcScale[tcIndex];
LOG_TEX1();
VertexManager::s_pCurBufferPointer += 4;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex8_UShort2()
{
u8 Index = DataReadU8();
const u16 *pData = (const u16 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u16)Common::swap16(pData[0]) * tcScale[tcIndex];
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(u16)Common::swap16(pData[1]) * tcScale[tcIndex];
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex8_Short1()
{
u8 Index = DataReadU8();
const u16 *pData = (const u16 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)Common::swap16(pData[0]) * tcScale[tcIndex];
LOG_TEX1();
VertexManager::s_pCurBufferPointer += 4;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex8_Short2()
{
u8 Index = DataReadU8();
const u16 *pData = (const u16 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)Common::swap16(pData[0]) * tcScale[tcIndex];
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s16)Common::swap16(pData[1]) * tcScale[tcIndex];
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex8_Float1()
{
u16 Index = DataReadU8();
const u32 *pData = (const u32 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]);
LOG_TEX1();
VertexManager::s_pCurBufferPointer += 4;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex8_Float2()
{
u16 Index = DataReadU8();
const u32 *pData = (const u32 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]);
((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]);
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
tcIndex++;
}
// ==================================================================================
void LOADERDECL TexCoord_ReadIndex16_UByte1()
{
u16 Index = DataReadU16();
const u8 *pData = cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u8)(pData[0]) * tcScale[tcIndex];
LOG_TEX1();
VertexManager::s_pCurBufferPointer += 4;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex16_UByte2()
{
u16 Index = DataReadU16();
const u8 *pData = cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u8)(pData[0]) * tcScale[tcIndex];
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(u8)(pData[1]) * tcScale[tcIndex];
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex16_Byte1()
{
u16 Index = DataReadU16();
const u8 *pData = cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)(pData[0]) * tcScale[tcIndex];
LOG_TEX1();
VertexManager::s_pCurBufferPointer += 4;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex16_Byte2()
{
u16 Index = DataReadU16();
const u8 *pData = cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)(pData[0]) * tcScale[tcIndex];
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s8)(pData[1]) * tcScale[tcIndex];
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex16_UShort1()
{
u16 Index = DataReadU16();
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u16)Common::swap16(pData[0]) * tcScale[tcIndex];
LOG_TEX1();
VertexManager::s_pCurBufferPointer += 4;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex16_UShort2()
{
u16 Index = DataReadU16();
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u16)Common::swap16(pData[0]) * tcScale[tcIndex];
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(u16)Common::swap16(pData[1]) * tcScale[tcIndex];
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex16_Short1()
{
u16 Index = DataReadU16();
const u16 *pData = (const u16 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)Common::swap16(*pData) * tcScale[tcIndex];
LOG_TEX1();
VertexManager::s_pCurBufferPointer += 4;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex16_Short2()
{
// Heavy in ZWW
u16 Index = DataReadU16();
const u16 *pData = (const u16 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)Common::swap16(pData[0]) * tcScale[tcIndex];
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s16)Common::swap16(pData[1]) * tcScale[tcIndex];
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
tcIndex++;
} }
#if _M_SSE >= 0x401 #if _M_SSE >= 0x401
static const __m128i kMaskSwap16_2 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFL, 0x02030001L); static const __m128i kMaskSwap16_2 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFL, 0x02030001L);
void LOADERDECL TexCoord_ReadIndex16_Short2_SSE4() template <typename I>
void LOADERDECL TexCoord_ReadIndex_Short2_SSE4()
{ {
static_assert(!std::numeric_limits<I>::is_signed, "Only unsigned I is sane!");
// Heavy in ZWW // Heavy in ZWW
u16 Index = DataReadU16(); auto const index = DataRead<I>();
const s32 *pData = (const s32*)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex])); const s32 *pData = (const s32*)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
const __m128i a = _mm_cvtsi32_si128(*pData); const __m128i a = _mm_cvtsi32_si128(*pData);
const __m128i b = _mm_shuffle_epi8(a, kMaskSwap16_2); const __m128i b = _mm_shuffle_epi8(a, kMaskSwap16_2);
const __m128i c = _mm_cvtepi16_epi32(b); const __m128i c = _mm_cvtepi16_epi32(b);
@ -319,47 +108,27 @@ void LOADERDECL TexCoord_ReadIndex16_Short2_SSE4()
const __m128 e = _mm_load1_ps(&tcScale[tcIndex]); const __m128 e = _mm_load1_ps(&tcScale[tcIndex]);
const __m128 f = _mm_mul_ps(d, e); const __m128 f = _mm_mul_ps(d, e);
_mm_storeu_ps((float*)VertexManager::s_pCurBufferPointer, f); _mm_storeu_ps((float*)VertexManager::s_pCurBufferPointer, f);
LOG_TEX2(); VertexManager::s_pCurBufferPointer += sizeof(float) * 2;
VertexManager::s_pCurBufferPointer += 8; LOG_TEX<2>();
tcIndex++; tcIndex++;
} }
#endif #endif
void LOADERDECL TexCoord_ReadIndex16_Float1()
{
u16 Index = DataReadU16();
const u32 *pData = (const u32 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]);
LOG_TEX1();
VertexManager::s_pCurBufferPointer += 4;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex16_Float2()
{
u16 Index = DataReadU16();
const u32 *pData = (const u32 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]);
((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]);
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
tcIndex++;
}
#if _M_SSE >= 0x301 #if _M_SSE >= 0x301
static const __m128i kMaskSwap32 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x04050607L, 0x00010203L); static const __m128i kMaskSwap32 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x04050607L, 0x00010203L);
void LOADERDECL TexCoord_ReadIndex16_Float2_SSSE3() template <typename I>
void LOADERDECL TexCoord_ReadIndex_Float2_SSSE3()
{ {
u16 Index = DataReadU16(); static_assert(!std::numeric_limits<I>::is_signed, "Only unsigned I is sane!");
const u32 *pData = (const u32 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
auto const index = DataRead<I>();
const u32 *pData = (const u32 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
GC_ALIGNED128(const __m128i a = _mm_loadl_epi64((__m128i*)pData)); GC_ALIGNED128(const __m128i a = _mm_loadl_epi64((__m128i*)pData));
GC_ALIGNED128(const __m128i b = _mm_shuffle_epi8(a, kMaskSwap32)); GC_ALIGNED128(const __m128i b = _mm_shuffle_epi8(a, kMaskSwap32));
u8* p = VertexManager::s_pCurBufferPointer; _mm_storel_epi64((__m128i*)VertexManager::s_pCurBufferPointer, b);
_mm_storel_epi64((__m128i*)p, b); VertexManager::s_pCurBufferPointer += sizeof(float) * 2;
LOG_TEX2(); LOG_TEX<2>();
p += 8;
VertexManager::s_pCurBufferPointer = p;
tcIndex++; tcIndex++;
} }
#endif #endif
@ -373,56 +142,40 @@ static TPipelineFunction tableReadTexCoord[4][8][2] = {
{NULL, NULL,}, {NULL, NULL,},
}, },
{ {
{TexCoord_ReadDirect_UByte1, TexCoord_ReadDirect_UByte2,}, {TexCoord_ReadDirect<u8, 1>, TexCoord_ReadDirect<u8, 2>,},
{TexCoord_ReadDirect_Byte1, TexCoord_ReadDirect_Byte2,}, {TexCoord_ReadDirect<s8, 1>, TexCoord_ReadDirect<s8, 2>,},
{TexCoord_ReadDirect_UShort1, TexCoord_ReadDirect_UShort2,}, {TexCoord_ReadDirect<u16, 1>, TexCoord_ReadDirect<u16, 2>,},
{TexCoord_ReadDirect_Short1, TexCoord_ReadDirect_Short2,}, {TexCoord_ReadDirect<s16, 1>, TexCoord_ReadDirect<s16, 2>,},
{TexCoord_ReadDirect_Float1, TexCoord_ReadDirect_Float2,}, {TexCoord_ReadDirect<float, 1>, TexCoord_ReadDirect<float, 2>,},
}, },
{ {
{TexCoord_ReadIndex8_UByte1, TexCoord_ReadIndex8_UByte2,}, {TexCoord_ReadIndex<u8, u8, 1>, TexCoord_ReadIndex<u8, u8, 2>,},
{TexCoord_ReadIndex8_Byte1, TexCoord_ReadIndex8_Byte2,}, {TexCoord_ReadIndex<u8, s8, 1>, TexCoord_ReadIndex<u8, s8, 2>,},
{TexCoord_ReadIndex8_UShort1, TexCoord_ReadIndex8_UShort2,}, {TexCoord_ReadIndex<u8, u16, 1>, TexCoord_ReadIndex<u8, u16, 2>,},
{TexCoord_ReadIndex8_Short1, TexCoord_ReadIndex8_Short2,}, {TexCoord_ReadIndex<u8, s16, 1>, TexCoord_ReadIndex<u8, s16, 2>,},
{TexCoord_ReadIndex8_Float1, TexCoord_ReadIndex8_Float2,}, {TexCoord_ReadIndex<u8, float, 1>, TexCoord_ReadIndex<u8, float, 2>,},
}, },
{ {
{TexCoord_ReadIndex16_UByte1, TexCoord_ReadIndex16_UByte2,}, {TexCoord_ReadIndex<u16, u8, 1>, TexCoord_ReadIndex<u16, u8, 2>,},
{TexCoord_ReadIndex16_Byte1, TexCoord_ReadIndex16_Byte2,}, {TexCoord_ReadIndex<u16, s8, 1>, TexCoord_ReadIndex<u16, s8, 2>,},
{TexCoord_ReadIndex16_UShort1, TexCoord_ReadIndex16_UShort2,}, {TexCoord_ReadIndex<u16, u16, 1>, TexCoord_ReadIndex<u16, u16, 2>,},
{TexCoord_ReadIndex16_Short1, TexCoord_ReadIndex16_Short2,}, {TexCoord_ReadIndex<u16, s16, 1>, TexCoord_ReadIndex<u16, s16, 2>,},
{TexCoord_ReadIndex16_Float1, TexCoord_ReadIndex16_Float2,}, {TexCoord_ReadIndex<u16, float, 1>, TexCoord_ReadIndex<u16, float, 2>,},
}, },
}; };
static int tableReadTexCoordVertexSize[4][8][2] = { static int tableReadTexCoordVertexSize[4][8][2] = {
{ {
{0, 0,}, {0, 0,}, {0, 0,}, {0, 0,}, {0, 0,}, {0, 0,},
{0, 0,},
{0, 0,},
{0, 0,},
{0, 0,},
}, },
{ {
{1, 2,}, {1, 2,}, {1, 2,}, {2, 4,}, {2, 4,}, {4, 8,},
{1, 2,},
{2, 4,},
{2, 4,},
{4, 8,},
}, },
{ {
{1, 1,}, {1, 1,}, {1, 1,}, {1, 1,}, {1, 1,}, {1, 1,},
{1, 1,},
{1, 1,},
{1, 1,},
{1, 1,},
}, },
{ {
{2, 2,}, {2, 2,}, {2, 2,}, {2, 2,}, {2, 2,}, {2, 2,},
{2, 2,},
{2, 2,},
{2, 2,},
{2, 2,},
}, },
}; };
@ -430,16 +183,20 @@ void VertexLoader_TextCoord::Init(void) {
#if _M_SSE >= 0x301 #if _M_SSE >= 0x301
if (cpu_info.bSSSE3) { if (cpu_info.bSSSE3)
tableReadTexCoord[3][4][1] = TexCoord_ReadIndex16_Float2_SSSE3; {
tableReadTexCoord[2][4][1] = TexCoord_ReadIndex_Float2_SSSE3<u8>;
tableReadTexCoord[3][4][1] = TexCoord_ReadIndex_Float2_SSSE3<u16>;
} }
#endif #endif
#if _M_SSE >= 0x401 #if _M_SSE >= 0x401
if (cpu_info.bSSE4_1) { if (cpu_info.bSSE4_1)
tableReadTexCoord[3][3][1] = TexCoord_ReadIndex16_Short2_SSE4; {
tableReadTexCoord[2][3][1] = TexCoord_ReadIndex_Short2_SSE4<u8>;
tableReadTexCoord[3][3][1] = TexCoord_ReadIndex_Short2_SSE4<u16>;
} }
#endif #endif

View File

@ -12,171 +12,120 @@
#include "BPStructs.h" #include "BPStructs.h"
#include "VertexManagerBase.h" #include "VertexManagerBase.h"
#include "MainBase.h"
#include "VideoConfig.h" #include "VideoConfig.h"
VertexManager *g_vertex_manager; VertexManager *g_vertex_manager;
u8 *VertexManager::s_pCurBufferPointer; u8 *VertexManager::s_pCurBufferPointer;
u8 *VertexManager::s_pBaseBufferPointer; u8 *VertexManager::s_pBaseBufferPointer;
u8 *VertexManager::s_pEndBufferPointer;
u8 *VertexManager::LocalVBuffer;
u16 *VertexManager::TIBuffer;
u16 *VertexManager::LIBuffer;
u16 *VertexManager::PIBuffer;
bool VertexManager::Flushed;
VertexManager::VertexManager() VertexManager::VertexManager()
{ {
Flushed = false; LocalVBuffer.resize(MAXVBUFFERSIZE);
s_pCurBufferPointer = s_pBaseBufferPointer = &LocalVBuffer[0];
s_pEndBufferPointer = s_pBaseBufferPointer + LocalVBuffer.size();
LocalVBuffer = new u8[MAXVBUFFERSIZE]; TIBuffer.resize(MAXIBUFFERSIZE);
s_pCurBufferPointer = s_pBaseBufferPointer = LocalVBuffer; LIBuffer.resize(MAXIBUFFERSIZE);
PIBuffer.resize(MAXIBUFFERSIZE);
TIBuffer = new u16[MAXIBUFFERSIZE];
LIBuffer = new u16[MAXIBUFFERSIZE];
PIBuffer = new u16[MAXIBUFFERSIZE];
IndexGenerator::Start(TIBuffer, LIBuffer, PIBuffer);
}
void VertexManager::ResetBuffer()
{
s_pCurBufferPointer = LocalVBuffer;
}
VertexManager::~VertexManager()
{
delete[] LocalVBuffer;
delete[] TIBuffer;
delete[] LIBuffer;
delete[] PIBuffer;
// TODO: necessary??
ResetBuffer(); ResetBuffer();
} }
void VertexManager::AddIndices(int primitive, int numVertices) VertexManager::~VertexManager()
{ {}
//switch (primitive)
//{
//case GX_DRAW_QUADS: IndexGenerator::AddQuads(numVertices); break;
//case GX_DRAW_TRIANGLES: IndexGenerator::AddList(numVertices); break;
//case GX_DRAW_TRIANGLE_STRIP: IndexGenerator::AddStrip(numVertices); break;
//case GX_DRAW_TRIANGLE_FAN: IndexGenerator::AddFan(numVertices); break;
//case GX_DRAW_LINES: IndexGenerator::AddLineList(numVertices); break;
//case GX_DRAW_LINE_STRIP: IndexGenerator::AddLineStrip(numVertices); break;
//case GX_DRAW_POINTS: IndexGenerator::AddPoints(numVertices); break;
//}
static void (*const primitive_table[])(int) = void VertexManager::ResetBuffer()
{
s_pCurBufferPointer = s_pBaseBufferPointer;
IndexGenerator::Start(GetTriangleIndexBuffer(), GetLineIndexBuffer(), GetPointIndexBuffer());
}
u32 VertexManager::GetRemainingSize()
{
return (u32)(s_pEndBufferPointer - s_pCurBufferPointer);
}
void VertexManager::PrepareForAdditionalData(int primitive, u32 count, u32 stride)
{
u32 const needed_vertex_bytes = count * stride;
if (needed_vertex_bytes > GetRemainingSize() || count > GetRemainingIndices(primitive))
{ {
IndexGenerator::AddQuads, Flush();
NULL,
IndexGenerator::AddList,
IndexGenerator::AddStrip,
IndexGenerator::AddFan,
IndexGenerator::AddLineList,
IndexGenerator::AddLineStrip,
IndexGenerator::AddPoints,
};
primitive_table[primitive](numVertices); if (needed_vertex_bytes > GetRemainingSize())
ERROR_LOG(VIDEO, "VertexManager: Buffer not large enough for all vertices! "
"Increase MAXVBUFFERSIZE or we need primitive breaking afterall.");
if (count > GetRemainingIndices(primitive))
ERROR_LOG(VIDEO, "VertexManager: Buffer not large enough for all indices! "
"Increase MAXIBUFFERSIZE or we need primitive breaking afterall.");
}
} }
int VertexManager::GetRemainingSize() bool VertexManager::IsFlushed() const
{ {
return MAXVBUFFERSIZE - (int)(s_pCurBufferPointer - LocalVBuffer); return s_pBaseBufferPointer == s_pCurBufferPointer;
} }
int VertexManager::GetRemainingVertices(int primitive) u32 VertexManager::GetRemainingIndices(int primitive)
{ {
switch (primitive) switch (primitive)
{ {
case GX_DRAW_QUADS: case GX_DRAW_QUADS:
return (MAXIBUFFERSIZE - IndexGenerator::GetTriangleindexLen()) / 6 * 4;
case GX_DRAW_TRIANGLES: case GX_DRAW_TRIANGLES:
return (MAXIBUFFERSIZE - IndexGenerator::GetTriangleindexLen());
case GX_DRAW_TRIANGLE_STRIP: case GX_DRAW_TRIANGLE_STRIP:
return (MAXIBUFFERSIZE - IndexGenerator::GetTriangleindexLen()) / 3 + 2;
case GX_DRAW_TRIANGLE_FAN: case GX_DRAW_TRIANGLE_FAN:
return (MAXIBUFFERSIZE - IndexGenerator::GetTriangleindexLen()) / 3; return (MAXIBUFFERSIZE - IndexGenerator::GetTriangleindexLen()) / 3 + 2;
break;
case GX_DRAW_LINES: case GX_DRAW_LINES:
return (MAXIBUFFERSIZE - IndexGenerator::GetLineindexLen());
case GX_DRAW_LINE_STRIP: case GX_DRAW_LINE_STRIP:
return (MAXIBUFFERSIZE - IndexGenerator::GetLineindexLen()) / 2; return (MAXIBUFFERSIZE - IndexGenerator::GetLineindexLen()) / 2 + 1;
break;
case GX_DRAW_POINTS: case GX_DRAW_POINTS:
return (MAXIBUFFERSIZE - IndexGenerator::GetPointindexLen()); return (MAXIBUFFERSIZE - IndexGenerator::GetPointindexLen());
break;
default: default:
return 0; return 0;
break;
} }
} }
void VertexManager::AddVertices(int primitive, int numVertices) void VertexManager::AddVertices(int primitive, u32 numVertices)
{ {
if (numVertices <= 0) if (numVertices <= 0)
return; return;
switch (primitive)
{
case GX_DRAW_QUADS:
case GX_DRAW_TRIANGLES:
case GX_DRAW_TRIANGLE_STRIP:
case GX_DRAW_TRIANGLE_FAN:
if (MAXIBUFFERSIZE - IndexGenerator::GetTriangleindexLen() < 3 * numVertices)
Flush();
break;
case GX_DRAW_LINES:
case GX_DRAW_LINE_STRIP:
if (MAXIBUFFERSIZE - IndexGenerator::GetLineindexLen() < 2 * numVertices)
Flush();
break;
case GX_DRAW_POINTS:
if (MAXIBUFFERSIZE - IndexGenerator::GetPointindexLen() < numVertices)
Flush();
break;
default:
return;
break;
}
if (Flushed)
{
IndexGenerator::Start(TIBuffer, LIBuffer, PIBuffer);
Flushed = false;
}
ADDSTAT(stats.thisFrame.numPrims, numVertices); ADDSTAT(stats.thisFrame.numPrims, numVertices);
INCSTAT(stats.thisFrame.numPrimitiveJoins); INCSTAT(stats.thisFrame.numPrimitiveJoins);
AddIndices(primitive, numVertices);
IndexGenerator::AddIndices(primitive, numVertices);
} }
void VertexManager::Flush() void VertexManager::Flush()
{ {
if (g_vertex_manager->IsFlushed())
return;
// loading a state will invalidate BP, so check for it // loading a state will invalidate BP, so check for it
g_video_backend->CheckInvalidState(); g_video_backend->CheckInvalidState();
VideoFifo_CheckEFBAccess();
g_vertex_manager->vFlush(); g_vertex_manager->vFlush();
g_vertex_manager->ResetBuffer();
} }
// TODO: need to merge more stuff into VideoCommon to use this // TODO: need to merge more stuff into VideoCommon to use this
#if (0) #if (0)
void VertexManager::Flush() void VertexManager::Flush()
{ {
if (LocalVBuffer == s_pCurBufferPointer || Flushed)
return;
Flushed = true;
VideoFifo_CheckEFBAccess();
#if defined(_DEBUG) || defined(DEBUGFAST) #if defined(_DEBUG) || defined(DEBUGFAST)
PRIM_LOG("frame%d:\n texgen=%d, numchan=%d, dualtex=%d, ztex=%d, cole=%d, alpe=%d, ze=%d", g_ActiveConfig.iSaveTargetId, xfregs.numTexGens, PRIM_LOG("frame%d:\n texgen=%d, numchan=%d, dualtex=%d, ztex=%d, cole=%d, alpe=%d, ze=%d", g_ActiveConfig.iSaveTargetId, xfregs.numTexGens,
xfregs.nNumChans, (int)xfregs.bEnableDualTexTransform, bpmem.ztex2.op, xfregs.nNumChans, (int)xfregs.bEnableDualTexTransform, bpmem.ztex2.op,
@ -249,9 +198,9 @@ void VertexManager::Flush()
// finally bind // finally bind
if (false == PixelShaderCache::SetShader(false, g_nativeVertexFmt->m_components)) if (false == PixelShaderCache::SetShader(false, g_nativeVertexFmt->m_components))
goto shader_fail; return;
if (false == VertexShaderCache::SetShader(g_nativeVertexFmt->m_components)) if (false == VertexShaderCache::SetShader(g_nativeVertexFmt->m_components))
goto shader_fail; return;
const int stride = g_nativeVertexFmt->GetVertexStride(); const int stride = g_nativeVertexFmt->GetVertexStride();
//if (g_nativeVertexFmt) //if (g_nativeVertexFmt)
@ -265,7 +214,7 @@ void VertexManager::Flush()
if (false == g_ActiveConfig.bDstAlphaPass && bpmem.dstalpha.enable && bpmem.blendmode.alphaupdate) if (false == g_ActiveConfig.bDstAlphaPass && bpmem.dstalpha.enable && bpmem.blendmode.alphaupdate)
{ {
if (false == PixelShaderCache::SetShader(true, g_nativeVertexFmt->m_components)) if (false == PixelShaderCache::SetShader(true, g_nativeVertexFmt->m_components))
goto shader_fail; return;
g_vertex_manager->Draw(stride, true); g_vertex_manager->Draw(stride, true);
} }
@ -301,9 +250,6 @@ void VertexManager::Flush()
} }
#endif #endif
++g_Config.iSaveTargetId; ++g_Config.iSaveTargetId;
shader_fail:
ResetBuffer();
} }
#endif #endif
@ -314,12 +260,16 @@ void VertexManager::DoState(PointerWrap& p)
void VertexManager::DoStateShared(PointerWrap& p) void VertexManager::DoStateShared(PointerWrap& p)
{ {
p.DoPointer(s_pCurBufferPointer, LocalVBuffer); // It seems we half-assume to be flushed here
p.DoArray(LocalVBuffer, MAXVBUFFERSIZE); // We update s_pCurBufferPointer yet don't worry about IndexGenerator's outdated pointers
p.DoArray(TIBuffer, MAXIBUFFERSIZE); // and maybe other things are overlooked
p.DoArray(LIBuffer, MAXIBUFFERSIZE);
p.DoArray(PIBuffer, MAXIBUFFERSIZE);
if (p.GetMode() == PointerWrap::MODE_READ) p.Do(LocalVBuffer);
Flushed = false; p.Do(TIBuffer);
p.Do(LIBuffer);
p.Do(PIBuffer);
s_pBaseBufferPointer = &LocalVBuffer[0];
s_pEndBufferPointer = s_pBaseBufferPointer + LocalVBuffer.size();
p.DoPointer(s_pCurBufferPointer, s_pBaseBufferPointer);
} }

View File

@ -2,72 +2,70 @@
#ifndef _VERTEXMANAGERBASE_H #ifndef _VERTEXMANAGERBASE_H
#define _VERTEXMANAGERBASE_H #define _VERTEXMANAGERBASE_H
#include <vector>
class NativeVertexFormat; class NativeVertexFormat;
class PointerWrap; class PointerWrap;
class VertexManager class VertexManager
{ {
private:
// What are the actual values?
static const u32 SMALLEST_POSSIBLE_VERTEX = 1;
static const u32 LARGEST_POSSIBLE_VERTEX = 188;
static const u32 MAX_PRIMITIVES_PER_COMMAND = (u16)-1;
public: public:
static const u32 MAXVBUFFERSIZE = MAX_PRIMITIVES_PER_COMMAND * LARGEST_POSSIBLE_VERTEX;
enum // We may convert triangle-fans to triangle-lists, almost 3x as many indices.
{ static const u32 MAXIBUFFERSIZE = MAX_PRIMITIVES_PER_COMMAND * 3;
// values from OGL backend
//MAXVBUFFERSIZE = 0x1FFFF,
//MAXIBUFFERSIZE = 0xFFFF,
// values from DX9 backend
//MAXVBUFFERSIZE = 0x50000,
//MAXIBUFFERSIZE = 0xFFFF,
// values from DX11 backend
MAXVBUFFERSIZE = 0x50000,
MAXIBUFFERSIZE = 0xFFFF,
};
VertexManager(); VertexManager();
virtual ~VertexManager(); // needs to be virtual for DX11's dtor // needs to be virtual for DX11's dtor
virtual ~VertexManager();
static void AddVertices(int _primitive, int _numVertices); static void AddVertices(int _primitive, u32 _numVertices);
// TODO: protected?
static u8 *s_pCurBufferPointer; static u8 *s_pCurBufferPointer;
static u8 *s_pBaseBufferPointer; static u8 *s_pBaseBufferPointer;
static u8 *s_pEndBufferPointer;
static int GetRemainingSize(); static u32 GetRemainingSize();
static int GetRemainingVertices(int primitive); static void PrepareForAdditionalData(int primitive, u32 count, u32 stride);
static u32 GetRemainingIndices(int primitive);
static void Flush(); static void Flush();
virtual ::NativeVertexFormat* CreateNativeVertexFormat() = 0; virtual ::NativeVertexFormat* CreateNativeVertexFormat() = 0;
static u16* GetTriangleIndexBuffer() { return TIBuffer; }
static u16* GetLineIndexBuffer() { return LIBuffer; }
static u16* GetPointIndexBuffer() { return PIBuffer; }
static u8* GetVertexBuffer() { return LocalVBuffer; }
static void DoState(PointerWrap& p); static void DoState(PointerWrap& p);
virtual void CreateDeviceObjects(){}; virtual void CreateDeviceObjects(){};
virtual void DestroyDeviceObjects(){}; virtual void DestroyDeviceObjects(){};
protected: protected:
// TODO: make private after Flush() is merged u16* GetTriangleIndexBuffer() { return &TIBuffer[0]; }
static void ResetBuffer(); u16* GetLineIndexBuffer() { return &LIBuffer[0]; }
u16* GetPointIndexBuffer() { return &PIBuffer[0]; }
static u8 *LocalVBuffer; u8* GetVertexBuffer() { return &s_pBaseBufferPointer[0]; }
static u16 *TIBuffer;
static u16 *LIBuffer;
static u16 *PIBuffer;
static bool Flushed;
virtual void vDoState(PointerWrap& p) { DoStateShared(p); } virtual void vDoState(PointerWrap& p) { DoStateShared(p); }
void DoStateShared(PointerWrap& p); void DoStateShared(PointerWrap& p);
private: private:
static void AddIndices(int primitive, int numVertices); bool IsFlushed() const;
void ResetBuffer();
//virtual void Draw(u32 stride, bool alphapass) = 0; //virtual void Draw(u32 stride, bool alphapass) = 0;
// temp // temp
virtual void vFlush() = 0; virtual void vFlush() = 0;
std::vector<u8> LocalVBuffer;
std::vector<u16> TIBuffer;
std::vector<u16> LIBuffer;
std::vector<u16> PIBuffer;
}; };
extern VertexManager *g_vertex_manager; extern VertexManager *g_vertex_manager;

View File

@ -90,8 +90,8 @@ struct TargetRectangle : public MathUtil::Rectangle<int>
#define PRIM_LOG(...) DEBUG_LOG(VIDEO, ##__VA_ARGS__) #define PRIM_LOG(...) DEBUG_LOG(VIDEO, ##__VA_ARGS__)
#endif #endif
// warning: mapping buffer should be disabled to use this
// #define LOG_VTX() DEBUG_LOG(VIDEO, "vtx: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[0], ((float*)VertexManager::s_pCurBufferPointer)[1], ((float*)VertexManager::s_pCurBufferPointer)[2]); // #define LOG_VTX() DEBUG_LOG(VIDEO, "vtx: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-3], ((float*)VertexManager::s_pCurBufferPointer)[-2], ((float*)VertexManager::s_pCurBufferPointer)[-1]);
#define LOG_VTX() #define LOG_VTX()

View File

@ -550,8 +550,7 @@ void CompileAndRunDisplayList(u32 address, u32 size, CachedDisplayList *dl)
cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7) cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7)
(cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT, (cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT,
numVertices); numVertices);
u8* EndAddress = VertexManager::s_pCurBufferPointer; u32 Vdatasize = (u32)(VertexManager::s_pCurBufferPointer - StartAddress);
u32 Vdatasize = (u32)(EndAddress - StartAddress);
if (Vdatasize > 0) if (Vdatasize > 0)
{ {
// Compile // Compile

View File

@ -104,7 +104,7 @@ void VertexManager::LoadBuffers()
{ {
D3D11_MAPPED_SUBRESOURCE map; D3D11_MAPPED_SUBRESOURCE map;
UINT vSize = UINT(s_pCurBufferPointer - LocalVBuffer); UINT vSize = UINT(s_pCurBufferPointer - s_pBaseBufferPointer);
D3D11_MAP MapType = D3D11_MAP_WRITE_NO_OVERWRITE; D3D11_MAP MapType = D3D11_MAP_WRITE_NO_OVERWRITE;
if (m_vertexBufferCursor + vSize >= VBUFFER_SIZE) if (m_vertexBufferCursor + vSize >= VBUFFER_SIZE)
{ {
@ -116,7 +116,7 @@ void VertexManager::LoadBuffers()
D3D::context->Map(m_vertexBuffers[m_activeVertexBuffer], 0, MapType, 0, &map); D3D::context->Map(m_vertexBuffers[m_activeVertexBuffer], 0, MapType, 0, &map);
memcpy((u8*)map.pData + m_vertexBufferCursor, LocalVBuffer, vSize); memcpy((u8*)map.pData + m_vertexBufferCursor, s_pBaseBufferPointer, vSize);
D3D::context->Unmap(m_vertexBuffers[m_activeVertexBuffer], 0); D3D::context->Unmap(m_vertexBuffers[m_activeVertexBuffer], 0);
m_vertexDrawOffset = m_vertexBufferCursor; m_vertexDrawOffset = m_vertexBufferCursor;
m_vertexBufferCursor += vSize; m_vertexBufferCursor += vSize;
@ -136,9 +136,9 @@ void VertexManager::LoadBuffers()
m_triangleDrawIndex = m_indexBufferCursor; m_triangleDrawIndex = m_indexBufferCursor;
m_lineDrawIndex = m_triangleDrawIndex + IndexGenerator::GetTriangleindexLen(); m_lineDrawIndex = m_triangleDrawIndex + IndexGenerator::GetTriangleindexLen();
m_pointDrawIndex = m_lineDrawIndex + IndexGenerator::GetLineindexLen(); m_pointDrawIndex = m_lineDrawIndex + IndexGenerator::GetLineindexLen();
memcpy((u16*)map.pData + m_triangleDrawIndex, TIBuffer, sizeof(u16) * IndexGenerator::GetTriangleindexLen()); memcpy((u16*)map.pData + m_triangleDrawIndex, GetTriangleIndexBuffer(), sizeof(u16) * IndexGenerator::GetTriangleindexLen());
memcpy((u16*)map.pData + m_lineDrawIndex, LIBuffer, sizeof(u16) * IndexGenerator::GetLineindexLen()); memcpy((u16*)map.pData + m_lineDrawIndex, GetLineIndexBuffer(), sizeof(u16) * IndexGenerator::GetLineindexLen());
memcpy((u16*)map.pData + m_pointDrawIndex, PIBuffer, sizeof(u16) * IndexGenerator::GetPointindexLen()); memcpy((u16*)map.pData + m_pointDrawIndex, GetPointIndexBuffer(), sizeof(u16) * IndexGenerator::GetPointindexLen());
D3D::context->Unmap(m_indexBuffers[m_activeIndexBuffer], 0); D3D::context->Unmap(m_indexBuffers[m_activeIndexBuffer], 0);
m_indexBufferCursor += iCount; m_indexBufferCursor += iCount;
} }
@ -208,13 +208,9 @@ void VertexManager::Draw(UINT stride)
if (IndexGenerator::GetNumLines() > 0 || IndexGenerator::GetNumPoints() > 0) if (IndexGenerator::GetNumLines() > 0 || IndexGenerator::GetNumPoints() > 0)
((DX11::Renderer*)g_renderer)->RestoreCull(); ((DX11::Renderer*)g_renderer)->RestoreCull();
} }
void VertexManager::vFlush() void VertexManager::vFlush()
{ {
if (LocalVBuffer == s_pCurBufferPointer) return;
if (Flushed) return;
Flushed=true;
VideoFifo_CheckEFBAccess();
u32 usedtextures = 0; u32 usedtextures = 0;
for (u32 i = 0; i < (u32)bpmem.genMode.numtevstages + 1; ++i) for (u32 i = 0; i < (u32)bpmem.genMode.numtevstages + 1; ++i)
if (bpmem.tevorders[i / 2].getEnable(i & 1)) if (bpmem.tevorders[i / 2].getEnable(i & 1))
@ -262,12 +258,12 @@ void VertexManager::vFlush()
g_nativeVertexFmt->m_components)) g_nativeVertexFmt->m_components))
{ {
GFX_DEBUGGER_PAUSE_LOG_AT(NEXT_ERROR,true,{printf("Fail to set pixel shader\n");}); GFX_DEBUGGER_PAUSE_LOG_AT(NEXT_ERROR,true,{printf("Fail to set pixel shader\n");});
goto shader_fail; return;
} }
if (!VertexShaderCache::SetShader(g_nativeVertexFmt->m_components)) if (!VertexShaderCache::SetShader(g_nativeVertexFmt->m_components))
{ {
GFX_DEBUGGER_PAUSE_LOG_AT(NEXT_ERROR,true,{printf("Fail to set pixel shader\n");}); GFX_DEBUGGER_PAUSE_LOG_AT(NEXT_ERROR,true,{printf("Fail to set pixel shader\n");});
goto shader_fail; return;
} }
LoadBuffers(); LoadBuffers();
unsigned int stride = g_nativeVertexFmt->GetVertexStride(); unsigned int stride = g_nativeVertexFmt->GetVertexStride();
@ -281,9 +277,6 @@ void VertexManager::vFlush()
GFX_DEBUGGER_PAUSE_AT(NEXT_FLUSH, true); GFX_DEBUGGER_PAUSE_AT(NEXT_FLUSH, true);
g_renderer->RestoreState(); g_renderer->RestoreState();
shader_fail:
ResetBuffer();
} }
} // namespace } // namespace

View File

@ -173,7 +173,7 @@ void VertexManager::PrepareVBuffers(int stride)
DestroyDeviceObjects(); DestroyDeviceObjects();
return; return;
} }
memcpy(pVertices, LocalVBuffer, datasize); memcpy(pVertices, s_pBaseBufferPointer, datasize);
VBuffers[CurrentVBuffer]->Unlock(); VBuffers[CurrentVBuffer]->Unlock();
LockMode = D3DLOCK_NOOVERWRITE; LockMode = D3DLOCK_NOOVERWRITE;
@ -192,17 +192,17 @@ void VertexManager::PrepareVBuffers(int stride)
} }
if(TdataSize) if(TdataSize)
{ {
memcpy(pIndices, TIBuffer, TdataSize * sizeof(u16)); memcpy(pIndices, GetTriangleIndexBuffer(), TdataSize * sizeof(u16));
pIndices += TdataSize; pIndices += TdataSize;
} }
if(LDataSize) if(LDataSize)
{ {
memcpy(pIndices, LIBuffer, LDataSize * sizeof(u16)); memcpy(pIndices, GetLineIndexBuffer(), LDataSize * sizeof(u16));
pIndices += LDataSize; pIndices += LDataSize;
} }
if(PDataSize) if(PDataSize)
{ {
memcpy(pIndices, PIBuffer, PDataSize * sizeof(u16)); memcpy(pIndices, GetPointIndexBuffer(), PDataSize * sizeof(u16));
} }
IBuffers[CurrentIBuffer]->Unlock(); IBuffers[CurrentIBuffer]->Unlock();
D3D::dev->SetStreamSource( 0, VBuffers[CurrentVBuffer], CurrentVBufferIndex, stride); D3D::dev->SetStreamSource( 0, VBuffers[CurrentVBuffer], CurrentVBufferIndex, stride);
@ -266,9 +266,9 @@ void VertexManager::DrawVA(int stride)
if (FAILED(D3D::dev->DrawIndexedPrimitiveUP( if (FAILED(D3D::dev->DrawIndexedPrimitiveUP(
D3DPT_TRIANGLELIST, D3DPT_TRIANGLELIST,
0, IndexGenerator::GetNumVerts(), IndexGenerator::GetNumTriangles(), 0, IndexGenerator::GetNumVerts(), IndexGenerator::GetNumTriangles(),
TIBuffer, GetTriangleIndexBuffer(),
D3DFMT_INDEX16, D3DFMT_INDEX16,
LocalVBuffer, s_pBaseBufferPointer,
stride))) stride)))
{ {
DumpBadShaders(); DumpBadShaders();
@ -280,9 +280,9 @@ void VertexManager::DrawVA(int stride)
if (FAILED(D3D::dev->DrawIndexedPrimitiveUP( if (FAILED(D3D::dev->DrawIndexedPrimitiveUP(
D3DPT_LINELIST, D3DPT_LINELIST,
0, IndexGenerator::GetNumVerts(), IndexGenerator::GetNumLines(), 0, IndexGenerator::GetNumVerts(), IndexGenerator::GetNumLines(),
LIBuffer, GetLineIndexBuffer(),
D3DFMT_INDEX16, D3DFMT_INDEX16,
LocalVBuffer, s_pBaseBufferPointer,
stride))) stride)))
{ {
DumpBadShaders(); DumpBadShaders();
@ -294,9 +294,9 @@ void VertexManager::DrawVA(int stride)
if (FAILED(D3D::dev->DrawIndexedPrimitiveUP( if (FAILED(D3D::dev->DrawIndexedPrimitiveUP(
D3DPT_POINTLIST, D3DPT_POINTLIST,
0, IndexGenerator::GetNumVerts(), IndexGenerator::GetNumPoints(), 0, IndexGenerator::GetNumVerts(), IndexGenerator::GetNumPoints(),
PIBuffer, GetPointIndexBuffer(),
D3DFMT_INDEX16, D3DFMT_INDEX16,
LocalVBuffer, s_pBaseBufferPointer,
stride))) stride)))
{ {
DumpBadShaders(); DumpBadShaders();
@ -307,11 +307,6 @@ void VertexManager::DrawVA(int stride)
void VertexManager::vFlush() void VertexManager::vFlush()
{ {
if (LocalVBuffer == s_pCurBufferPointer) return;
if (Flushed) return;
Flushed = true;
VideoFifo_CheckEFBAccess();
u32 usedtextures = 0; u32 usedtextures = 0;
for (u32 i = 0; i < (u32)bpmem.genMode.numtevstages + 1; ++i) for (u32 i = 0; i < (u32)bpmem.genMode.numtevstages + 1; ++i)
if (bpmem.tevorders[i / 2].getEnable(i & 1)) if (bpmem.tevorders[i / 2].getEnable(i & 1))
@ -388,7 +383,6 @@ shader_fail:
CurrentIBufferIndex += IndexGenerator::GetTriangleindexLen() + IndexGenerator::GetLineindexLen() + IndexGenerator::GetPointindexLen(); CurrentIBufferIndex += IndexGenerator::GetTriangleindexLen() + IndexGenerator::GetLineindexLen() + IndexGenerator::GetPointindexLen();
CurrentVBufferIndex += IndexGenerator::GetNumVerts() * stride; CurrentVBufferIndex += IndexGenerator::GetNumVerts() * stride;
} }
ResetBuffer();
} }
} }

View File

@ -84,27 +84,23 @@ void VertexManager::Draw()
{ {
if (IndexGenerator::GetNumTriangles() > 0) if (IndexGenerator::GetNumTriangles() > 0)
{ {
glDrawElements(GL_TRIANGLES, IndexGenerator::GetTriangleindexLen(), GL_UNSIGNED_SHORT, TIBuffer); glDrawElements(GL_TRIANGLES, IndexGenerator::GetTriangleindexLen(), GL_UNSIGNED_SHORT, GetTriangleIndexBuffer());
INCSTAT(stats.thisFrame.numIndexedDrawCalls); INCSTAT(stats.thisFrame.numIndexedDrawCalls);
} }
if (IndexGenerator::GetNumLines() > 0) if (IndexGenerator::GetNumLines() > 0)
{ {
glDrawElements(GL_LINES, IndexGenerator::GetLineindexLen(), GL_UNSIGNED_SHORT, LIBuffer); glDrawElements(GL_LINES, IndexGenerator::GetLineindexLen(), GL_UNSIGNED_SHORT, GetLineIndexBuffer());
INCSTAT(stats.thisFrame.numIndexedDrawCalls); INCSTAT(stats.thisFrame.numIndexedDrawCalls);
} }
if (IndexGenerator::GetNumPoints() > 0) if (IndexGenerator::GetNumPoints() > 0)
{ {
glDrawElements(GL_POINTS, IndexGenerator::GetPointindexLen(), GL_UNSIGNED_SHORT, PIBuffer); glDrawElements(GL_POINTS, IndexGenerator::GetPointindexLen(), GL_UNSIGNED_SHORT, GetPointIndexBuffer());
INCSTAT(stats.thisFrame.numIndexedDrawCalls); INCSTAT(stats.thisFrame.numIndexedDrawCalls);
} }
} }
void VertexManager::vFlush() void VertexManager::vFlush()
{ {
if (LocalVBuffer == s_pCurBufferPointer) return;
if (Flushed) return;
Flushed=true;
VideoFifo_CheckEFBAccess();
#if defined(_DEBUG) || defined(DEBUGFAST) #if defined(_DEBUG) || defined(DEBUGFAST)
PRIM_LOG("frame%d:\n texgen=%d, numchan=%d, dualtex=%d, ztex=%d, cole=%d, alpe=%d, ze=%d", g_ActiveConfig.iSaveTargetId, xfregs.numTexGen.numTexGens, PRIM_LOG("frame%d:\n texgen=%d, numchan=%d, dualtex=%d, ztex=%d, cole=%d, alpe=%d, ze=%d", g_ActiveConfig.iSaveTargetId, xfregs.numTexGen.numTexGens,
xfregs.numChan.numColorChans, xfregs.dualTexTrans.enabled, bpmem.ztex2.op, xfregs.numChan.numColorChans, xfregs.dualTexTrans.enabled, bpmem.ztex2.op,
@ -136,7 +132,7 @@ void VertexManager::vFlush()
(void)GL_REPORT_ERROR(); (void)GL_REPORT_ERROR();
//glBindBuffer(GL_ARRAY_BUFFER, s_vboBuffers[s_nCurVBOIndex]); //glBindBuffer(GL_ARRAY_BUFFER, s_vboBuffers[s_nCurVBOIndex]);
//glBufferData(GL_ARRAY_BUFFER, s_pCurBufferPointer - LocalVBuffer, LocalVBuffer, GL_STREAM_DRAW); //glBufferData(GL_ARRAY_BUFFER, s_pCurBufferPointer - s_pBaseBufferPointer, s_pBaseBufferPointer, GL_STREAM_DRAW);
GL_REPORT_ERRORD(); GL_REPORT_ERRORD();
// setup the pointers // setup the pointers
@ -244,8 +240,6 @@ void VertexManager::vFlush()
GFX_DEBUGGER_PAUSE_AT(NEXT_FLUSH, true); GFX_DEBUGGER_PAUSE_AT(NEXT_FLUSH, true);
//s_nCurVBOIndex = (s_nCurVBOIndex + 1) % ARRAYSIZE(s_vboBuffers); //s_nCurVBOIndex = (s_nCurVBOIndex + 1) % ARRAYSIZE(s_vboBuffers);
s_pCurBufferPointer = LocalVBuffer;
IndexGenerator::Start(TIBuffer,LIBuffer,PIBuffer);
#if defined(_DEBUG) || defined(DEBUGFAST) #if defined(_DEBUG) || defined(DEBUGFAST)
if (g_ActiveConfig.iLog & CONF_SAVESHADERS) if (g_ActiveConfig.iLog & CONF_SAVESHADERS)