Merge branch 'vertex-loader-cleanup'

This commit is contained in:
degasus 2013-03-06 14:08:02 +01:00
commit 8d5299c20b
18 changed files with 770 additions and 1732 deletions

View File

@ -172,6 +172,41 @@ inline u64 swap64(u64 data) {return ((u64)swap32(data) << 32) | swap32(data >> 3
inline u16 swap16(const u8* _pData) {return swap16(*(const u16*)_pData);}
inline u32 swap32(const u8* _pData) {return swap32(*(const u32*)_pData);}
inline u64 swap64(const u8* _pData) {return swap64(*(const u64*)_pData);}
template <int count>
void swap(u8*);
template <>
inline void swap<1>(u8* data)
{}
template <>
inline void swap<2>(u8* data)
{
*reinterpret_cast<u16*>(data) = swap16(data);
}
template <>
inline void swap<4>(u8* data)
{
*reinterpret_cast<u32*>(data) = swap32(data);
}
template <>
inline void swap<8>(u8* data)
{
*reinterpret_cast<u64*>(data) = swap64(data);
}
template <typename T>
inline T FromBigEndian(T data)
{
//static_assert(std::is_arithmetic<T>::value, "function only makes sense with arithmetic types");
swap<sizeof(data)>(reinterpret_cast<u8*>(&data));
return data;
}
} // Namespace Common
#endif // _COMMONFUNCS_H_

View File

@ -20,6 +20,8 @@
#ifndef _DATAREADER_H
#define _DATAREADER_H
#include "VertexManagerBase.h"
extern u8* g_pVideoData;
#if _M_SSE >= 0x301 && !(defined __GNUC__ && !defined __SSSE3__)
@ -31,43 +33,63 @@ __forceinline void DataSkip(u32 skip)
g_pVideoData += skip;
}
// probably unnecessary
template <int count>
__forceinline void DataSkip()
{
g_pVideoData += count;
}
template <typename T>
__forceinline T DataPeek(int _uOffset)
{
auto const result = Common::FromBigEndian(*reinterpret_cast<T*>(g_pVideoData + _uOffset));
return result;
}
// TODO: kill these
__forceinline u8 DataPeek8(int _uOffset)
{
return g_pVideoData[_uOffset];
return DataPeek<u8>(_uOffset);
}
__forceinline u16 DataPeek16(int _uOffset)
{
return Common::swap16(*(u16*)&g_pVideoData[_uOffset]);
return DataPeek<u16>(_uOffset);
}
__forceinline u32 DataPeek32(int _uOffset)
{
return Common::swap32(*(u32*)&g_pVideoData[_uOffset]);
return DataPeek<u32>(_uOffset);
}
template <typename T>
__forceinline T DataRead()
{
auto const result = DataPeek<T>(0);
DataSkip<sizeof(T)>();
return result;
}
// TODO: kill these
__forceinline u8 DataReadU8()
{
return *g_pVideoData++;
return DataRead<u8>();
}
__forceinline s8 DataReadS8()
{
return (s8)(*g_pVideoData++);
return DataRead<s8>();
}
__forceinline u16 DataReadU16()
{
u16 tmp = Common::swap16(*(u16*)g_pVideoData);
g_pVideoData += 2;
return tmp;
return DataRead<u16>();
}
__forceinline u32 DataReadU32()
{
u32 tmp = Common::swap32(*(u32*)g_pVideoData);
g_pVideoData += 4;
return tmp;
return DataRead<u32>();
}
typedef void (*DataReadU32xNfunc)(u32 *buf);
@ -120,58 +142,16 @@ __forceinline u32 DataReadU32Unswapped()
return tmp;
}
template<class T>
__forceinline T DataRead()
{
T tmp = *(T*)g_pVideoData;
g_pVideoData += sizeof(T);
return tmp;
}
template <>
__forceinline u16 DataRead()
{
u16 tmp = Common::swap16(*(u16*)g_pVideoData);
g_pVideoData += 2;
return tmp;
}
template <>
__forceinline s16 DataRead()
{
s16 tmp = (s16)Common::swap16(*(u16*)g_pVideoData);
g_pVideoData += 2;
return tmp;
}
template <>
__forceinline u32 DataRead()
{
u32 tmp = (u32)Common::swap32(*(u32*)g_pVideoData);
g_pVideoData += 4;
return tmp;
}
template <>
__forceinline s32 DataRead()
{
s32 tmp = (s32)Common::swap32(*(u32*)g_pVideoData);
g_pVideoData += 4;
return tmp;
}
__forceinline float DataReadF32()
{
union {u32 i; float f;} temp;
temp.i = Common::swap32(*(u32*)g_pVideoData);
g_pVideoData += 4;
float tmp = temp.f;
return tmp;
}
__forceinline u8* DataGetPosition()
{
return g_pVideoData;
}
template <typename T>
__forceinline void DataWrite(T data)
{
*(T*)VertexManager::s_pCurBufferPointer = data;
VertexManager::s_pCurBufferPointer += sizeof(T);
}
#endif

View File

@ -15,6 +15,9 @@
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#include <cstddef>
#include "Common.h"
#include "IndexGenerator.h"
/*
@ -27,24 +30,18 @@ QUAD simulator
*/
//Init
u16 *IndexGenerator::Tptr = 0;
u16 *IndexGenerator::BASETptr = 0;
u16 *IndexGenerator::Lptr = 0;
u16 *IndexGenerator::BASELptr = 0;
u16 *IndexGenerator::Pptr = 0;
u16 *IndexGenerator::BASEPptr = 0;
int IndexGenerator::numT = 0;
int IndexGenerator::numL = 0;
int IndexGenerator::numP = 0;
int IndexGenerator::index = 0;
int IndexGenerator::Tadds = 0;
int IndexGenerator::Ladds = 0;
int IndexGenerator::Padds = 0;
IndexGenerator::IndexPrimitiveType IndexGenerator::LastTPrimitive = Prim_None;
IndexGenerator::IndexPrimitiveType IndexGenerator::LastLPrimitive = Prim_None;
bool IndexGenerator::used = false;
u16 *IndexGenerator::Tptr;
u16 *IndexGenerator::BASETptr;
u16 *IndexGenerator::Lptr;
u16 *IndexGenerator::BASELptr;
u16 *IndexGenerator::Pptr;
u16 *IndexGenerator::BASEPptr;
u32 IndexGenerator::numT;
u32 IndexGenerator::numL;
u32 IndexGenerator::numP;
u32 IndexGenerator::index;
void IndexGenerator::Start(u16 *Triangleptr,u16 *Lineptr,u16 *Pointptr)
void IndexGenerator::Start(u16* Triangleptr, u16* Lineptr, u16* Pointptr)
{
Tptr = Triangleptr;
Lptr = Lineptr;
@ -56,288 +53,116 @@ void IndexGenerator::Start(u16 *Triangleptr,u16 *Lineptr,u16 *Pointptr)
numT = 0;
numL = 0;
numP = 0;
Tadds = 0;
Ladds = 0;
Padds = 0;
LastTPrimitive = Prim_None;
LastLPrimitive = Prim_None;
}
void IndexGenerator::AddIndices(int primitive, u32 numVerts)
{
//switch (primitive)
//{
//case GX_DRAW_QUADS: IndexGenerator::AddQuads(numVerts); break;
//case GX_DRAW_TRIANGLES: IndexGenerator::AddList(numVerts); break;
//case GX_DRAW_TRIANGLE_STRIP: IndexGenerator::AddStrip(numVerts); break;
//case GX_DRAW_TRIANGLE_FAN: IndexGenerator::AddFan(numVerts); break;
//case GX_DRAW_LINES: IndexGenerator::AddLineList(numVerts); break;
//case GX_DRAW_LINE_STRIP: IndexGenerator::AddLineStrip(numVerts); break;
//case GX_DRAW_POINTS: IndexGenerator::AddPoints(numVerts); break;
//}
static void (*const primitive_table[])(u32) =
{
IndexGenerator::AddQuads,
NULL,
IndexGenerator::AddList,
IndexGenerator::AddStrip,
IndexGenerator::AddFan,
IndexGenerator::AddLineList,
IndexGenerator::AddLineStrip,
IndexGenerator::AddPoints,
};
primitive_table[primitive](numVerts);
index += numVerts;
}
// Triangles
void IndexGenerator::AddList(int numVerts)
__forceinline void IndexGenerator::WriteTriangle(u32 index1, u32 index2, u32 index3)
{
//if we have no vertices return
if(numVerts <= 0) return;
int numTris = numVerts / 3;
if (!numTris)
{
//if we have less than 3 verts
if(numVerts == 1)
{
// discard
index++;
return;
}
else
{
//we have two verts render a degenerated triangle
numTris = 1;
*Tptr++ = index;
*Tptr++ = index+1;
*Tptr++ = index;
}
}
else
{
for (int i = 0; i < numTris; i++)
{
*Tptr++ = index+i*3;
*Tptr++ = index+i*3+1;
*Tptr++ = index+i*3+2;
}
int baseRemainingverts = numVerts - numVerts % 3;
switch (numVerts % 3)
{
case 2:
//whe have 2 remaining verts use strip method
*Tptr++ = index + baseRemainingverts - 1;
*Tptr++ = index + baseRemainingverts;
*Tptr++ = index + baseRemainingverts + 1;
numTris++;
break;
case 1:
//whe have 1 remaining verts use strip method this is only a conjeture
*Tptr++ = index + baseRemainingverts - 2;
*Tptr++ = index + baseRemainingverts - 1;
*Tptr++ = index + baseRemainingverts;
numTris++;
break;
default:
break;
};
}
index += numVerts;
numT += numTris;
Tadds++;
LastTPrimitive = Prim_List;
*Tptr++ = index1;
*Tptr++ = index2;
*Tptr++ = index3;
++numT;
}
void IndexGenerator::AddStrip(int numVerts)
void IndexGenerator::AddList(u32 const numVerts)
{
if(numVerts <= 0) return;
int numTris = numVerts - 2;
if (numTris < 1)
auto const numTris = numVerts / 3;
for (u32 i = 0; i != numTris; ++i)
{
//if we have less than 3 verts
if(numVerts == 1)
{
// discard
index++;
return;
}
else
{
//we have two verts render a degenerated triangle
numTris = 1;
*Tptr++ = index;
*Tptr++ = index+1;
*Tptr++ = index;
}
WriteTriangle(index + i * 3, index + i * 3 + 1, index + i * 3 + 2);
}
else
{
bool wind = false;
for (int i = 0; i < numTris; i++)
{
*Tptr++ = index+i;
*Tptr++ = index+i+(wind?2:1);
*Tptr++ = index+i+(wind?1:2);
wind = !wind;
}
}
index += numVerts;
numT += numTris;
Tadds++;
LastTPrimitive = Prim_Strip;
}
void IndexGenerator::AddFan(int numVerts)
{
if(numVerts <= 0) return;
int numTris = numVerts - 2;
if (numTris < 1)
{
//if we have less than 3 verts
if(numVerts == 1)
{
//Discard
index++;
return;
}
else
{
//we have two verts render a degenerated triangle
numTris = 1;
*Tptr++ = index;
*Tptr++ = index+1;
*Tptr++ = index;
}
}
else
{
for (int i = 0; i < numTris; i++)
{
*Tptr++ = index;
*Tptr++ = index+i+1;
*Tptr++ = index+i+2;
}
}
index += numVerts;
numT += numTris;
Tadds++;
LastTPrimitive = Prim_Fan;
}
void IndexGenerator::AddQuads(int numVerts)
void IndexGenerator::AddStrip(u32 const numVerts)
{
if(numVerts <= 0) return;
int numTris = (numVerts/4)*2;
if (numTris == 0)
bool wind = false;
for (u32 i = 2; i < numVerts; ++i)
{
//if we have less than 3 verts
if(numVerts == 1)
{
//discard
index++;
return;
}
else
{
if(numVerts == 2)
{
//we have two verts render a degenerated triangle
numTris = 1;
*Tptr++ = index;
*Tptr++ = index + 1;
*Tptr++ = index;
}
else
{
//we have 3 verts render a full triangle
numTris = 1;
*Tptr++ = index;
*Tptr++ = index + 1;
*Tptr++ = index + 2;
}
}
WriteTriangle(
index + i - 2,
index + i - !wind,
index + i - wind);
wind ^= true;
}
else
{
for (int i = 0; i < numTris / 2; i++)
{
*Tptr++ = index+i*4;
*Tptr++ = index+i*4+1;
*Tptr++ = index+i*4+2;
*Tptr++ = index+i*4;
*Tptr++ = index+i*4+2;
*Tptr++ = index+i*4+3;
}
int baseRemainingverts = numVerts - numVerts % 4;
switch (numVerts % 4)
{
case 3:
//whe have 3 remaining verts use strip method
*Tptr++ = index + baseRemainingverts;
*Tptr++ = index + baseRemainingverts + 1;
*Tptr++ = index + baseRemainingverts + 2;
numTris++;
break;
case 2:
//whe have 2 remaining verts use strip method
*Tptr++ = index + baseRemainingverts - 1;
*Tptr++ = index + baseRemainingverts;
*Tptr++ = index + baseRemainingverts + 1;
numTris++;
break;
case 1:
//whe have 1 remaining verts use strip method this is only a conjeture
*Tptr++ = index + baseRemainingverts - 2;
*Tptr++ = index + baseRemainingverts - 1;
*Tptr++ = index + baseRemainingverts;
numTris++;
break;
default:
break;
};
}
index += numVerts;
numT += numTris;
Tadds++;
LastTPrimitive = Prim_List;
}
//Lines
void IndexGenerator::AddLineList(int numVerts)
void IndexGenerator::AddFan(u32 numVerts)
{
if(numVerts <= 0) return;
int numLines = numVerts / 2;
if (!numLines)
for (u32 i = 2; i < numVerts; ++i)
{
//Discard
index++;
return;
WriteTriangle(index, index + i - 1, index + i);
}
else
{
for (int i = 0; i < numLines; i++)
{
*Lptr++ = index+i*2;
*Lptr++ = index+i*2+1;
}
if((numVerts & 1) != 0)
{
//use line strip for remaining vert
*Lptr++ = index + numLines * 2 - 1;
*Lptr++ = index + numLines * 2;
}
}
index += numVerts;
numL += numLines;
Ladds++;
LastLPrimitive = Prim_List;
}
void IndexGenerator::AddLineStrip(int numVerts)
void IndexGenerator::AddQuads(u32 numVerts)
{
int numLines = numVerts - 1;
if (numLines <= 0)
auto const numQuads = numVerts / 4;
for (u32 i = 0; i != numQuads; ++i)
{
if(numVerts == 1)
{
index++;
}
return;
WriteTriangle(index + i * 4, index + i * 4 + 1, index + i * 4 + 2);
WriteTriangle(index + i * 4, index + i * 4 + 2, index + i * 4 + 3);
}
for (int i = 0; i < numLines; i++)
{
*Lptr++ = index+i;
*Lptr++ = index+i+1;
}
index += numVerts;
numL += numLines;
Ladds++;
LastLPrimitive = Prim_Strip;
}
//Points
void IndexGenerator::AddPoints(int numVerts)
// Lines
void IndexGenerator::AddLineList(u32 numVerts)
{
for (int i = 0; i < numVerts; i++)
auto const numLines = numVerts / 2;
for (u32 i = 0; i != numLines; ++i)
{
*Pptr++ = index+i;
*Lptr++ = index + i * 2;
*Lptr++ = index + i * 2 + 1;
++numL;
}
}
void IndexGenerator::AddLineStrip(u32 numVerts)
{
for (u32 i = 1; i < numVerts; ++i)
{
*Lptr++ = index + i - 1;
*Lptr++ = index + i;
++numL;
}
}
// Points
void IndexGenerator::AddPoints(u32 numVerts)
{
for (u32 i = 0; i != numVerts; ++i)
{
*Pptr++ = index + i;
++numP;
}
index += numVerts;
numP += numVerts;
Padds++;
}

View File

@ -25,53 +25,58 @@
class IndexGenerator
{
public:
//Init
// Init
static void Start(u16 *Triangleptr,u16 *Lineptr,u16 *Pointptr);
//Triangles
static void AddList(int numVerts);
static void AddStrip(int numVerts);
static void AddFan(int numVerts);
static void AddQuads(int numVerts);
//Lines
static void AddLineList(int numVerts);
static void AddLineStrip(int numVerts);
//Points
static void AddPoints(int numVerts);
//Interface
static int GetNumTriangles() {used = true; return numT;}
static int GetNumLines() {used = true;return numL;}
static int GetNumPoints() {used = true;return numP;}
static int GetNumVerts() {return index;} //returns numprimitives
static int GetNumAdds() {return Tadds + Ladds + Padds;}
static int GetTriangleindexLen() {return (int)(Tptr - BASETptr);}
static int GetLineindexLen() {return (int)(Lptr - BASELptr);}
static int GetPointindexLen() {return (int)(Pptr - BASEPptr);}
static void AddIndices(int primitive, u32 numVertices);
// Interface
static u32 GetNumTriangles() {return numT;}
static u32 GetNumLines() {return numL;}
static u32 GetNumPoints() {return numP;}
// returns numprimitives
static u32 GetNumVerts() {return index;}
static u32 GetTriangleindexLen() {return (u32)(Tptr - BASETptr);}
static u32 GetLineindexLen() {return (u32)(Lptr - BASELptr);}
static u32 GetPointindexLen() {return (u32)(Pptr - BASEPptr);}
/*
enum IndexPrimitiveType
{
Prim_None = 0,
Prim_List,
Prim_Strip,
Prim_Fan
} ;
};
*/
private:
// Triangles
static void AddList(u32 numVerts);
static void AddStrip(u32 numVerts);
static void AddFan(u32 numVerts);
static void AddQuads(u32 numVerts);
// Lines
static void AddLineList(u32 numVerts);
static void AddLineStrip(u32 numVerts);
// Points
static void AddPoints(u32 numVerts);
static void WriteTriangle(u32 index1, u32 index2, u32 index3);
static u16 *Tptr;
static u16 *BASETptr;
static u16 *Lptr;
static u16 *BASELptr;
static u16 *Pptr;
static u16 *BASEPptr;
static int numT;
static int numL;
static int numP;
static int index;
static int Tadds;
static int Ladds;
static int Padds;
static IndexPrimitiveType LastTPrimitive;
static IndexPrimitiveType LastLPrimitive;
static bool used;
// TODO: redundant variables
static u32 numT;
static u32 numL;
static u32 numP;
static u32 index;
};
#endif // _INDEXGENERATOR_H

View File

@ -73,6 +73,10 @@ int colElements[2];
float posScale;
float tcScale[8];
// bbox must read vertex position, so convert it to this buffer
static float s_bbox_vertex_buffer[3];
static u8 *s_bbox_pCurBufferPointer_orig;
static const float fractionTable[32] = {
1.0f / (1U << 0), 1.0f / (1U << 1), 1.0f / (1U << 2), 1.0f / (1U << 3),
1.0f / (1U << 4), 1.0f / (1U << 5), 1.0f / (1U << 6), 1.0f / (1U << 7),
@ -95,10 +99,21 @@ void LOADERDECL PosMtx_ReadDirect_UByte()
void LOADERDECL PosMtx_Write()
{
*VertexManager::s_pCurBufferPointer++ = s_curposmtx;
*VertexManager::s_pCurBufferPointer++ = 0;
*VertexManager::s_pCurBufferPointer++ = 0;
*VertexManager::s_pCurBufferPointer++ = 0;
DataWrite<u8>(s_curposmtx);
DataWrite<u8>(0);
DataWrite<u8>(0);
DataWrite<u8>(0);
}
void LOADERDECL UpdateBoundingBoxPrepare()
{
if (!PixelEngine::bbox_active)
return;
// set our buffer as videodata buffer, so we will get a copy of the vertex positions
// this is a big hack, but so we can use the same converting function then without bbox
s_bbox_pCurBufferPointer_orig = VertexManager::s_pCurBufferPointer;
VertexManager::s_pCurBufferPointer = (u8*)s_bbox_vertex_buffer;
}
void LOADERDECL UpdateBoundingBox()
@ -106,12 +121,16 @@ void LOADERDECL UpdateBoundingBox()
if (!PixelEngine::bbox_active)
return;
// Truly evil hack, reading backwards from the write pointer. If we were writing to write-only
// memory like we might have been with a D3D vertex buffer, this would have been a bad idea.
float *data = (float *)(VertexManager::s_pCurBufferPointer - 12);
// reset videodata pointer
VertexManager::s_pCurBufferPointer = s_bbox_pCurBufferPointer_orig;
// copy vertex pointers
memcpy(VertexManager::s_pCurBufferPointer, s_bbox_vertex_buffer, 12);
VertexManager::s_pCurBufferPointer += 12;
// We must transform the just loaded point by the current world and projection matrix - in software.
// Then convert to screen space and update the bounding box.
float p[3] = {data[0], data[1], data[2]};
float p[3] = {s_bbox_vertex_buffer[0], s_bbox_vertex_buffer[1], s_bbox_vertex_buffer[2]};
const float *world_matrix = (float*)xfmem + MatrixIndexA.PosNormalMtxIdx * 4;
const float *proj_matrix = &g_fProjectionMatrix[0];
@ -149,24 +168,22 @@ void LOADERDECL TexMtx_ReadDirect_UByte()
void LOADERDECL TexMtx_Write_Float()
{
*(float*)VertexManager::s_pCurBufferPointer = (float)s_curtexmtx[s_texmtxwrite++];
VertexManager::s_pCurBufferPointer += 4;
DataWrite(float(s_curtexmtx[s_texmtxwrite++]));
}
void LOADERDECL TexMtx_Write_Float2()
{
((float*)VertexManager::s_pCurBufferPointer)[0] = 0;
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)s_curtexmtx[s_texmtxwrite++];
VertexManager::s_pCurBufferPointer += 8;
DataWrite(0.f);
DataWrite(float(s_curtexmtx[s_texmtxwrite++]));
}
void LOADERDECL TexMtx_Write_Float4()
{
((float*)VertexManager::s_pCurBufferPointer)[0] = 0;
((float*)VertexManager::s_pCurBufferPointer)[1] = 0;
((float*)VertexManager::s_pCurBufferPointer)[2] = s_curtexmtx[s_texmtxwrite++];
((float*)VertexManager::s_pCurBufferPointer)[3] = 0; // Just to fill out with 0.
VertexManager::s_pCurBufferPointer += 16;
DataWrite(0.f);
DataWrite(0.f);
DataWrite(float(s_curtexmtx[s_texmtxwrite++]));
// Just to fill out with 0.
DataWrite(0.f);
}
VertexLoader::VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr)
@ -274,15 +291,16 @@ void VertexLoader::CompileVertexTranslator()
if (m_VtxDesc.Tex7MatIdx) {m_VertexSize += 1; m_NativeFmt->m_components |= VB_HAS_TEXMTXIDX7; WriteCall(TexMtx_ReadDirect_UByte); }
// Write vertex position loader
WriteCall(VertexLoader_Position::GetFunction(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements));
if(g_ActiveConfig.bUseBBox) {
WriteCall(UpdateBoundingBoxPrepare);
WriteCall(VertexLoader_Position::GetFunction(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements));
WriteCall(UpdateBoundingBox);
} else {
WriteCall(VertexLoader_Position::GetFunction(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements));
}
m_VertexSize += VertexLoader_Position::GetSize(m_VtxDesc.Position, m_VtxAttr.PosFormat, m_VtxAttr.PosElements);
nat_offset += 12;
// OK, so we just got a point. Let's go back and read it for the bounding box.
if(g_ActiveConfig.bUseBBox)
WriteCall(UpdateBoundingBox);
// Normals
vtx_decl.num_normals = 0;
if (m_VtxDesc.Normal != NOT_PRESENT)
@ -507,7 +525,8 @@ void VertexLoader::WriteSetVariable(int bits, void *address, OpArg value)
#endif
}
#endif
void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
int VertexLoader::SetupRunVertices(int vtx_attr_group, int primitive, int const count)
{
m_numLoadedVertices += count;
@ -526,7 +545,7 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
{
// if cull mode is none, ignore triangles and quads
DataSkip(count * m_VertexSize);
return;
return 0;
}
m_NativeFmt->EnableComponents(m_NativeFmt->m_components);
@ -550,157 +569,48 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
for (int i = 0; i < 2; i++)
colElements[i] = m_VtxAttr.color[i].Elements;
// if strips or fans, make sure all vertices can fit in buffer, otherwise flush
int granularity = 1;
switch (primitive) {
case 3: // strip .. hm, weird
case 4: // fan
if (VertexManager::GetRemainingSize() < 3 * native_stride)
VertexManager::Flush();
break;
case 6: // line strip
if (VertexManager::GetRemainingSize() < 2 * native_stride)
VertexManager::Flush();
break;
case 0: granularity = 4; break; // quads
case 2: granularity = 3; break; // tris
case 5: granularity = 2; break; // lines
}
VertexManager::PrepareForAdditionalData(primitive, count, native_stride);
int startv = 0, extraverts = 0;
int v = 0;
//int remainingVerts2 = VertexManager::GetRemainingVertices(primitive);
while (v < count)
{
int remainingVerts = VertexManager::GetRemainingSize() / native_stride;
//if (remainingVerts2 - v + startv < remainingVerts)
//remainingVerts = remainingVerts2 - v + startv;
if (remainingVerts < granularity) {
INCSTAT(stats.thisFrame.numBufferSplits);
// This buffer full - break current primitive and flush, to switch to the next buffer.
u8* plastptr = VertexManager::s_pCurBufferPointer;
if (v - startv > 0)
VertexManager::AddVertices(primitive, v - startv + extraverts);
VertexManager::Flush();
//remainingVerts2 = VertexManager::GetRemainingVertices(primitive);
// Why does this need to be so complicated?
switch (primitive) {
case 3: // triangle strip, copy last two vertices
// a little trick since we have to keep track of signs
if (v & 1) {
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-2*native_stride, native_stride);
memcpy_gc(VertexManager::s_pCurBufferPointer+native_stride, plastptr-native_stride*2, 2*native_stride);
VertexManager::s_pCurBufferPointer += native_stride*3;
extraverts = 3;
}
else {
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride*2, native_stride*2);
VertexManager::s_pCurBufferPointer += native_stride*2;
extraverts = 2;
}
break;
case 4: // tri fan, copy first and last vert
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride*(v-startv+extraverts), native_stride);
VertexManager::s_pCurBufferPointer += native_stride;
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride, native_stride);
VertexManager::s_pCurBufferPointer += native_stride;
extraverts = 2;
break;
case 6: // line strip
memcpy_gc(VertexManager::s_pCurBufferPointer, plastptr-native_stride, native_stride);
VertexManager::s_pCurBufferPointer += native_stride;
extraverts = 1;
break;
default:
extraverts = 0;
break;
}
startv = v;
}
int remainingPrims = remainingVerts / granularity;
remainingVerts = remainingPrims * granularity;
if (count - v < remainingVerts)
remainingVerts = count - v;
#ifdef USE_JIT
if (remainingVerts > 0) {
loop_counter = remainingVerts;
((void (*)())(void*)m_compiledCode)();
}
#else
for (int s = 0; s < remainingVerts; s++)
{
tcIndex = 0;
colIndex = 0;
s_texmtxwrite = s_texmtxread = 0;
for (int i = 0; i < m_numPipelineStages; i++)
m_PipelineStages[i]();
PRIM_LOG("\n");
}
#endif
v += remainingVerts;
}
if (startv < count)
VertexManager::AddVertices(primitive, count - startv + extraverts);
return count;
}
void VertexLoader::RunCompiledVertices(int vtx_attr_group, int primitive, int count, u8* Data)
void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int const count)
{
m_numLoadedVertices += count;
// Flush if our vertex format is different from the currently set.
if (g_nativeVertexFmt != NULL && g_nativeVertexFmt != m_NativeFmt)
{
// We really must flush here. It's possible that the native representations
// of the two vtx formats are the same, but we have no way to easily check that
// now.
VertexManager::Flush();
// Also move the Set() here?
}
g_nativeVertexFmt = m_NativeFmt;
if (bpmem.genMode.cullmode == 3 && primitive < 5)
{
// if cull mode is none, ignore triangles and quads
DataSkip(count * m_VertexSize);
return;
}
m_NativeFmt->EnableComponents(m_NativeFmt->m_components);
// Load position and texcoord scale factors.
m_VtxAttr.PosFrac = g_VtxAttr[vtx_attr_group].g0.PosFrac;
m_VtxAttr.texCoord[0].Frac = g_VtxAttr[vtx_attr_group].g0.Tex0Frac;
m_VtxAttr.texCoord[1].Frac = g_VtxAttr[vtx_attr_group].g1.Tex1Frac;
m_VtxAttr.texCoord[2].Frac = g_VtxAttr[vtx_attr_group].g1.Tex2Frac;
m_VtxAttr.texCoord[3].Frac = g_VtxAttr[vtx_attr_group].g1.Tex3Frac;
m_VtxAttr.texCoord[4].Frac = g_VtxAttr[vtx_attr_group].g2.Tex4Frac;
m_VtxAttr.texCoord[5].Frac = g_VtxAttr[vtx_attr_group].g2.Tex5Frac;
m_VtxAttr.texCoord[6].Frac = g_VtxAttr[vtx_attr_group].g2.Tex6Frac;
m_VtxAttr.texCoord[7].Frac = g_VtxAttr[vtx_attr_group].g2.Tex7Frac;
pVtxAttr = &m_VtxAttr;
posScale = fractionTable[m_VtxAttr.PosFrac];
if (m_NativeFmt->m_components & VB_HAS_UVALL)
for (int i = 0; i < 8; i++)
tcScale[i] = fractionTable[m_VtxAttr.texCoord[i].Frac];
for (int i = 0; i < 2; i++)
colElements[i] = m_VtxAttr.color[i].Elements;
if(VertexManager::GetRemainingSize() < native_stride * count)
VertexManager::Flush();
memcpy_gc(VertexManager::s_pCurBufferPointer, Data, native_stride * count);
VertexManager::s_pCurBufferPointer += native_stride * count;
DataSkip(count * m_VertexSize);
VertexManager::AddVertices(primitive, count);
auto const new_count = SetupRunVertices(vtx_attr_group, primitive, count);
ConvertVertices(new_count);
VertexManager::AddVertices(primitive, new_count);
}
void VertexLoader::ConvertVertices ( int count )
{
#ifdef USE_JIT
if (count > 0) {
loop_counter = count;
((void (*)())(void*)m_compiledCode)();
}
#else
for (int s = 0; s < count; s++)
{
tcIndex = 0;
colIndex = 0;
s_texmtxwrite = s_texmtxread = 0;
for (int i = 0; i < m_numPipelineStages; i++)
m_PipelineStages[i]();
PRIM_LOG("\n");
}
#endif
}
void VertexLoader::RunCompiledVertices(int vtx_attr_group, int primitive, int const count, u8* Data)
{
auto const new_count = SetupRunVertices(vtx_attr_group, primitive, count);
memcpy_gc(VertexManager::s_pCurBufferPointer, Data, native_stride * new_count);
VertexManager::s_pCurBufferPointer += native_stride * new_count;
DataSkip(new_count * m_VertexSize);
VertexManager::AddVertices(primitive, new_count);
}
void VertexLoader::SetVAT(u32 _group0, u32 _group1, u32 _group2)
{

View File

@ -88,6 +88,8 @@ public:
~VertexLoader();
int GetVertexSize() const {return m_VertexSize;}
int SetupRunVertices(int vtx_attr_group, int primitive, int const count);
void RunVertices(int vtx_attr_group, int primitive, int count);
void RunCompiledVertices(int vtx_attr_group, int primitive, int count, u8* Data);
@ -124,6 +126,7 @@ private:
void SetVAT(u32 _group0, u32 _group1, u32 _group2);
void CompileVertexTranslator();
void ConvertVertices(int count);
void WriteCall(TPipelineFunction);

View File

@ -15,9 +15,6 @@
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#ifndef _VERTEXLOADERCOLOR_H
#define _VERTEXLOADERCOLOR_H
#include "Common.h"
#include "VideoCommon.h"
#include "LookUpTables.h"
@ -37,8 +34,7 @@ extern int colElements[2];
__forceinline void _SetCol(u32 val)
{
*(u32*)VertexManager::s_pCurBufferPointer = val;
VertexManager::s_pCurBufferPointer += 4;
DataWrite(val);
colIndex++;
}
@ -132,80 +128,65 @@ void LOADERDECL Color_ReadDirect_32b_8888()
_SetCol(col);
}
void LOADERDECL Color_ReadIndex8_16b_565()
template <typename I>
void Color_ReadIndex_16b_565()
{
u8 Index = DataReadU8();
auto const Index = DataRead<I>();
u16 val = Common::swap16(*(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex])));
_SetCol565(val);
}
void LOADERDECL Color_ReadIndex8_24b_888()
template <typename I>
void Color_ReadIndex_24b_888()
{
u8 Index = DataReadU8();
auto const Index = DataRead<I>();
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
_SetCol(_Read24(iAddress));
}
void LOADERDECL Color_ReadIndex8_32b_888x()
template <typename I>
void Color_ReadIndex_32b_888x()
{
u8 Index = DataReadU8();
auto const Index = DataRead<I>();
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
_SetCol(_Read24(iAddress));
}
void LOADERDECL Color_ReadIndex8_16b_4444()
template <typename I>
void Color_ReadIndex_16b_4444()
{
u8 Index = DataReadU8();
auto const Index = DataRead<I>();
u16 val = *(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]));
_SetCol4444(val);
}
void LOADERDECL Color_ReadIndex8_24b_6666()
template <typename I>
void Color_ReadIndex_24b_6666()
{
u8 Index = DataReadU8();
auto const Index = DataRead<I>();
const u8* pData = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]) - 1;
u32 val = Common::swap32(pData);
_SetCol6666(val);
}
void LOADERDECL Color_ReadIndex8_32b_8888()
template <typename I>
void Color_ReadIndex_32b_8888()
{
u8 Index = DataReadU8();
auto const Index = DataRead<I>();
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
_SetCol(_Read32(iAddress));
}
void LOADERDECL Color_ReadIndex16_16b_565()
{
u16 Index = DataReadU16();
u16 val = Common::swap16(*(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex])));
_SetCol565(val);
}
void LOADERDECL Color_ReadIndex16_24b_888()
{
u16 Index = DataReadU16();
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
_SetCol(_Read24(iAddress));
}
void LOADERDECL Color_ReadIndex16_32b_888x()
{
u16 Index = DataReadU16();
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
_SetCol(_Read24(iAddress));
}
void LOADERDECL Color_ReadIndex16_16b_4444()
{
u16 Index = DataReadU16();
u16 val = *(const u16 *)(cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]));
_SetCol4444(val);
}
void LOADERDECL Color_ReadIndex16_24b_6666()
{
u16 Index = DataReadU16();
const u8 *pData = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]) - 1;
u32 val = Common::swap32(pData);
_SetCol6666(val);
}
void LOADERDECL Color_ReadIndex16_32b_8888()
{
u16 Index = DataReadU16();
const u8 *iAddress = cached_arraybases[ARRAY_COLOR+colIndex] + (Index * arraystrides[ARRAY_COLOR+colIndex]);
_SetCol(_Read32(iAddress));
}
#endif
void LOADERDECL Color_ReadIndex8_16b_565() { Color_ReadIndex_16b_565<u8>(); }
void LOADERDECL Color_ReadIndex8_24b_888() { Color_ReadIndex_24b_888<u8>(); }
void LOADERDECL Color_ReadIndex8_32b_888x() { Color_ReadIndex_32b_888x<u8>(); }
void LOADERDECL Color_ReadIndex8_16b_4444() { Color_ReadIndex_16b_4444<u8>(); }
void LOADERDECL Color_ReadIndex8_24b_6666() { Color_ReadIndex_24b_6666<u8>(); }
void LOADERDECL Color_ReadIndex8_32b_8888() { Color_ReadIndex_32b_8888<u8>(); }
void LOADERDECL Color_ReadIndex16_16b_565() { Color_ReadIndex_16b_565<u16>(); }
void LOADERDECL Color_ReadIndex16_24b_888() { Color_ReadIndex_24b_888<u16>(); }
void LOADERDECL Color_ReadIndex16_32b_888x() { Color_ReadIndex_32b_888x<u16>(); }
void LOADERDECL Color_ReadIndex16_16b_4444() { Color_ReadIndex_16b_4444<u16>(); }
void LOADERDECL Color_ReadIndex16_24b_6666() { Color_ReadIndex_24b_6666<u16>(); }
void LOADERDECL Color_ReadIndex16_32b_8888() { Color_ReadIndex_32b_8888<u16>(); }

View File

@ -22,6 +22,7 @@
#include "VertexManagerBase.h"
#include "CPUDetect.h"
#include <cmath>
#include <limits>
#if _M_SSE >= 0x401
#include <smmintrin.h>
@ -30,78 +31,163 @@
#include <tmmintrin.h>
#endif
// warning: mapping buffer should be disabled to use this
#define LOG_NORM() // PRIM_LOG("norm: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-3], ((float*)VertexManager::s_pCurBufferPointer)[-2], ((float*)VertexManager::s_pCurBufferPointer)[-1]);
VertexLoader_Normal::Set VertexLoader_Normal::m_Table[NUM_NRM_TYPE][NUM_NRM_INDICES][NUM_NRM_ELEMENTS][NUM_NRM_FORMAT];
namespace
{
template <typename T>
__forceinline float FracAdjust(T val)
{
//auto const S8FRAC = 1.f / (1u << 6);
//auto const U8FRAC = 1.f / (1u << 7);
//auto const S16FRAC = 1.f / (1u << 14);
//auto const U16FRAC = 1.f / (1u << 15);
// TODO: is this right?
return val / float(1u << (sizeof(T) * 8 - std::numeric_limits<T>::is_signed - 1));
}
template <>
__forceinline float FracAdjust(float val)
{ return val; }
template <typename T, int N>
__forceinline void ReadIndirect(const T* data)
{
static_assert(3 == N || 9 == N, "N is only sane as 3 or 9!");
for (int i = 0; i != N; ++i)
{
DataWrite(FracAdjust(Common::FromBigEndian(data[i])));
}
LOG_NORM();
}
template <typename T, int N>
struct Normal_Direct
{
static void LOADERDECL function()
{
auto const source = reinterpret_cast<const T*>(DataGetPosition());
ReadIndirect<T, N * 3>(source);
DataSkip<N * 3 * sizeof(T)>();
}
static const int size = sizeof(T) * N * 3;
};
template <typename I, typename T, int N, int Offset>
__forceinline void Normal_Index_Offset()
{
static_assert(!std::numeric_limits<I>::is_signed, "Only unsigned I is sane!");
auto const index = DataRead<I>();
auto const data = reinterpret_cast<const T*>(cached_arraybases[ARRAY_NORMAL]
+ (index * arraystrides[ARRAY_NORMAL]) + sizeof(T) * 3 * Offset);
ReadIndirect<T, N * 3>(data);
}
template <typename I, typename T, int N>
struct Normal_Index
{
static void LOADERDECL function()
{
Normal_Index_Offset<I, T, N, 0>();
}
static const int size = sizeof(I);
};
template <typename I, typename T>
struct Normal_Index_Indices3
{
static void LOADERDECL function()
{
Normal_Index_Offset<I, T, 1, 0>();
Normal_Index_Offset<I, T, 1, 1>();
Normal_Index_Offset<I, T, 1, 2>();
}
static const int size = sizeof(I) * 3;
};
}
void VertexLoader_Normal::Init(void)
{
// HACK is for signed instead of unsigned to prevent crashes.
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Set(3, Normal_DirectByte); //HACK
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Set(3, Normal_DirectByte);
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Set(6, Normal_DirectShort); //HACK
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Set(6, Normal_DirectShort);
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Set(12, Normal_DirectFloat);
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Set(9, Normal_DirectByte3); //HACK
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Set(9, Normal_DirectByte3);
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Set(18, Normal_DirectShort3); //HACK
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Set(18, Normal_DirectShort3);
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Set(36, Normal_DirectFloat3);
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Normal_Direct<u8, 1>();
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Normal_Direct<s8, 1>();
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Normal_Direct<u16, 1>();
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Normal_Direct<s16, 1>();
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Normal_Direct<float, 1>();
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Normal_Direct<u8, 3>();
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Normal_Direct<s8, 3>();
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Normal_Direct<u16, 3>();
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Normal_Direct<s16, 3>();
m_Table[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Normal_Direct<float, 3>();
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Set(3, Normal_DirectByte); //HACK
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Set(3, Normal_DirectByte);
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Set(6, Normal_DirectShort); //HACK
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Set(6, Normal_DirectShort);
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Set(12, Normal_DirectFloat);
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Set(9, Normal_DirectByte3); //HACK
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Set(9, Normal_DirectByte3);
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Set(18, Normal_DirectShort3); //HACK
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Set(18, Normal_DirectShort3);
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Set(36, Normal_DirectFloat3);
// Same as above
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Normal_Direct<u8, 1>();
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Normal_Direct<s8, 1>();
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Normal_Direct<u16, 1>();
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Normal_Direct<s16, 1>();
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Normal_Direct<float, 1>();
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Normal_Direct<u8, 3>();
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Normal_Direct<s8, 3>();
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Direct<u16, 3>();
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Direct<s16, 3>();
m_Table[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Direct<float, 3>();
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Set(1, Normal_Index8_Byte); //HACK
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Set(1, Normal_Index8_Byte);
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Set(1, Normal_Index8_Short); //HACK
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Set(1, Normal_Index8_Short);
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Set(1, Normal_Index8_Float);
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Set(1, Normal_Index8_Byte3_Indices1); //HACK
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Set(1, Normal_Index8_Byte3_Indices1);
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Set(1, Normal_Index8_Short3_Indices1); //HACK
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Set(1, Normal_Index8_Short3_Indices1);
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Set(1, Normal_Index8_Float3_Indices1);
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Normal_Index<u8, u8, 1>();
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Normal_Index<u8, s8, 1>();
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Normal_Index<u8, u16, 1>();
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Normal_Index<u8, s16, 1>();
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Normal_Index<u8, float, 1>();
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Normal_Index<u8, u8, 3>();
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Normal_Index<u8, s8, 3>();
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Normal_Index<u8, u16, 3>();
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Normal_Index<u8, s16, 3>();
m_Table[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Normal_Index<u8, float, 3>();
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Set(1, Normal_Index8_Byte); //HACK
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Set(1, Normal_Index8_Byte);
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Set(1, Normal_Index8_Short); //HACK
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Set(1, Normal_Index8_Short);
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Set(1, Normal_Index8_Float);
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Set(3, Normal_Index8_Byte3_Indices3); //HACK
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Set(3, Normal_Index8_Byte3_Indices3);
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Set(3, Normal_Index8_Short3_Indices3); //HACK
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Set(3, Normal_Index8_Short3_Indices3);
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Set(3, Normal_Index8_Float3_Indices3);
// Same as above for NRM_NBT
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Normal_Index<u8, u8, 1>();
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Normal_Index<u8, s8, 1>();
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Normal_Index<u8, u16, 1>();
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Normal_Index<u8, s16, 1>();
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Normal_Index<u8, float, 1>();
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Normal_Index_Indices3<u8, u8>();
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Normal_Index_Indices3<u8, s8>();
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Index_Indices3<u8, u16>();
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Index_Indices3<u8, s16>();
m_Table[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Index_Indices3<u8, float>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Set(2, Normal_Index16_Byte); //HACK
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Set(2, Normal_Index16_Byte);
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Set(2, Normal_Index16_Short); //HACK
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Set(2, Normal_Index16_Short);
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Set(2, Normal_Index16_Float);
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Set(2, Normal_Index16_Byte3_Indices1); //HACK
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Set(2, Normal_Index16_Byte3_Indices1);
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Set(2, Normal_Index16_Short3_Indices1); //HACK
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Set(2, Normal_Index16_Short3_Indices1);
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Set(2, Normal_Index16_Float3_Indices1);
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_UBYTE] = Normal_Index<u16, u8, 1>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Normal_Index<u16, s8, 1>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Normal_Index<u16, u16, 1>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Normal_Index<u16, s16, 1>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_FLOAT] = Normal_Index<u16, float, 1>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_UBYTE] = Normal_Index<u16, u8, 3>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Normal_Index<u16, s8, 3>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_USHORT] = Normal_Index<u16, u16, 3>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_SHORT] = Normal_Index<u16, s16, 3>();
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_FLOAT] = Normal_Index<u16, float, 3>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Set(2, Normal_Index16_Byte); //HACK
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Set(2, Normal_Index16_Byte);
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Set(2, Normal_Index16_Short); //HACK
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Set(2, Normal_Index16_Short);
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Set(2, Normal_Index16_Float);
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Set(6, Normal_Index16_Byte3_Indices3); //HACK
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Set(6, Normal_Index16_Byte3_Indices3);
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Set(6, Normal_Index16_Short3_Indices3); //HACK
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Set(6, Normal_Index16_Short3_Indices3);
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Set(6, Normal_Index16_Float3_Indices3);
// Same as above for NRM_NBT
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_UBYTE] = Normal_Index<u16, u8, 1>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Normal_Index<u16, s8, 1>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Normal_Index<u16, u16, 1>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Normal_Index<u16, s16, 1>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_FLOAT] = Normal_Index<u16, float, 1>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_UBYTE] = Normal_Index_Indices3<u16, u8>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Normal_Index_Indices3<u16, s8>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Normal_Index_Indices3<u16, u16>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Normal_Index_Indices3<u16, s16>();
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Normal_Index_Indices3<u16, float>();
}
unsigned int VertexLoader_Normal::GetSize(unsigned int _type,
@ -116,312 +202,3 @@ TPipelineFunction VertexLoader_Normal::GetFunction(unsigned int _type,
TPipelineFunction pFunc = m_Table[_type][_index3][_elements][_format].function;
return pFunc;
}
// This fracs are fixed acording to format
#define S8FRAC 0.015625f; // 1.0f / (1U << 6)
#define S16FRAC 0.00006103515625f; // 1.0f / (1U << 14)
// --- Direct ---
inline void ReadIndirectS8x3(const s8* pData)
{
((float*)VertexManager::s_pCurBufferPointer)[0] = pData[0] * S8FRAC;
((float*)VertexManager::s_pCurBufferPointer)[1] = pData[1] * S8FRAC;
((float*)VertexManager::s_pCurBufferPointer)[2] = pData[2] * S8FRAC;
VertexManager::s_pCurBufferPointer += 12;
LOG_NORM();
}
inline void ReadIndirectS8x9(const s8* pData)
{
((float*)VertexManager::s_pCurBufferPointer)[0] = pData[0] * S8FRAC;
((float*)VertexManager::s_pCurBufferPointer)[1] = pData[1] * S8FRAC;
((float*)VertexManager::s_pCurBufferPointer)[2] = pData[2] * S8FRAC;
LOG_NORM();
((float*)VertexManager::s_pCurBufferPointer)[3] = pData[3] * S8FRAC;
((float*)VertexManager::s_pCurBufferPointer)[4] = pData[4] * S8FRAC;
((float*)VertexManager::s_pCurBufferPointer)[5] = pData[5] * S8FRAC;
LOG_NORM();
((float*)VertexManager::s_pCurBufferPointer)[6] = pData[6] * S8FRAC;
((float*)VertexManager::s_pCurBufferPointer)[7] = pData[7] * S8FRAC;
((float*)VertexManager::s_pCurBufferPointer)[8] = pData[8] * S8FRAC;
LOG_NORM();
VertexManager::s_pCurBufferPointer += 36;
}
inline void ReadIndirectS16x3(const u16* pData)
{
((float*)VertexManager::s_pCurBufferPointer)[0] = ((s16)Common::swap16(pData[0])) * S16FRAC;
((float*)VertexManager::s_pCurBufferPointer)[1] = ((s16)Common::swap16(pData[1])) * S16FRAC;
((float*)VertexManager::s_pCurBufferPointer)[2] = ((s16)Common::swap16(pData[2])) * S16FRAC;
VertexManager::s_pCurBufferPointer += 12;
LOG_NORM()
}
inline void ReadIndirectS16x9(const u16* pData)
{
((float*)VertexManager::s_pCurBufferPointer)[0] = ((s16)Common::swap16(pData[0])) * S16FRAC;
((float*)VertexManager::s_pCurBufferPointer)[1] = ((s16)Common::swap16(pData[1])) * S16FRAC;
((float*)VertexManager::s_pCurBufferPointer)[2] = ((s16)Common::swap16(pData[2])) * S16FRAC;
LOG_NORM()
((float*)VertexManager::s_pCurBufferPointer)[3] = ((s16)Common::swap16(pData[3])) * S16FRAC;
((float*)VertexManager::s_pCurBufferPointer)[4] = ((s16)Common::swap16(pData[4])) * S16FRAC;
((float*)VertexManager::s_pCurBufferPointer)[5] = ((s16)Common::swap16(pData[5])) * S16FRAC;
LOG_NORM()
((float*)VertexManager::s_pCurBufferPointer)[6] = ((s16)Common::swap16(pData[6])) * S16FRAC;
((float*)VertexManager::s_pCurBufferPointer)[7] = ((s16)Common::swap16(pData[7])) * S16FRAC;
((float*)VertexManager::s_pCurBufferPointer)[8] = ((s16)Common::swap16(pData[8])) * S16FRAC;
LOG_NORM()
VertexManager::s_pCurBufferPointer += 36;
}
inline void ReadIndirectFloatx3(const u32* pData)
{
((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]);
((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]);
((u32*)VertexManager::s_pCurBufferPointer)[2] = Common::swap32(pData[2]);
VertexManager::s_pCurBufferPointer += 12;
LOG_NORM();
}
inline void ReadIndirectFloatx9(const u32* pData)
{
((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]);
((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]);
((u32*)VertexManager::s_pCurBufferPointer)[2] = Common::swap32(pData[2]);
LOG_NORM();
((u32*)VertexManager::s_pCurBufferPointer)[3] = Common::swap32(pData[3]);
((u32*)VertexManager::s_pCurBufferPointer)[4] = Common::swap32(pData[4]);
((u32*)VertexManager::s_pCurBufferPointer)[5] = Common::swap32(pData[5]);
LOG_NORM();
((u32*)VertexManager::s_pCurBufferPointer)[6] = Common::swap32(pData[6]);
((u32*)VertexManager::s_pCurBufferPointer)[7] = Common::swap32(pData[7]);
((u32*)VertexManager::s_pCurBufferPointer)[8] = Common::swap32(pData[8]);
LOG_NORM();
VertexManager::s_pCurBufferPointer += 36;
}
inline void ReadDirectS8x3()
{
const s8* Source = (const s8*)DataGetPosition();
ReadIndirectS8x3(Source);
DataSkip(3);
}
inline void ReadDirectS8x9()
{
const s8* Source = (const s8*)DataGetPosition();
ReadIndirectS8x9(Source);
DataSkip(9);
}
inline void ReadDirectS16x3()
{
const u16* Source = (const u16*)DataGetPosition();
ReadIndirectS16x3(Source);
DataSkip(6);
}
inline void ReadDirectS16x9()
{
const u16* Source = (const u16*)DataGetPosition();
ReadIndirectS16x9(Source);
DataSkip(18);
}
inline void ReadDirectFloatx3()
{
const u32* Source = (const u32*)DataGetPosition();
ReadIndirectFloatx3(Source);
DataSkip(12);
}
inline void ReadDirectFloatx9()
{
const u32* Source = (const u32*)DataGetPosition();
ReadIndirectFloatx9(Source);
DataSkip(36);
}
void LOADERDECL VertexLoader_Normal::Normal_DirectByte()
{
ReadDirectS8x3();
}
void LOADERDECL VertexLoader_Normal::Normal_DirectShort()
{
ReadDirectS16x3();
}
void LOADERDECL VertexLoader_Normal::Normal_DirectFloat()
{
ReadDirectFloatx3();
}
void LOADERDECL VertexLoader_Normal::Normal_DirectByte3()
{
ReadDirectS8x9();
}
void LOADERDECL VertexLoader_Normal::Normal_DirectShort3()
{
ReadDirectS16x9();
}
void LOADERDECL VertexLoader_Normal::Normal_DirectFloat3()
{
ReadDirectFloatx9();
}
// --- Index8 ---
void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte()
{
u8 Index = DataReadU8();
const s8* pData = (const s8 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
ReadIndirectS8x3(pData);
}
void LOADERDECL VertexLoader_Normal::Normal_Index8_Short()
{
u8 Index = DataReadU8();
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
ReadIndirectS16x3(pData);
}
void LOADERDECL VertexLoader_Normal::Normal_Index8_Float()
{
u8 Index = DataReadU8();
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
ReadIndirectFloatx3(pData);
}
void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte3_Indices1()
{
u8 Index = DataReadU8();
const s8* pData = (const s8 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
ReadIndirectS8x9(pData);
}
void LOADERDECL VertexLoader_Normal::Normal_Index8_Short3_Indices1()
{
u8 Index = DataReadU8();
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
ReadIndirectS16x9(pData);
}
void LOADERDECL VertexLoader_Normal::Normal_Index8_Float3_Indices1()
{
u8 Index = DataReadU8();
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
ReadIndirectFloatx9(pData);
}
void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte3_Indices3()
{
for (int i = 0; i < 3; i++)
{
u8 Index = DataReadU8();
const s8* pData = (const s8*)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 1*3*i);
ReadIndirectS8x3(pData);
}
}
void LOADERDECL VertexLoader_Normal::Normal_Index8_Short3_Indices3()
{
for (int i = 0; i < 3; i++)
{
u8 Index = DataReadU8();
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 2*3*i);
ReadIndirectS16x3(pData);
}
}
void LOADERDECL VertexLoader_Normal::Normal_Index8_Float3_Indices3()
{
for (int i = 0; i < 3; i++)
{
u8 Index = DataReadU8();
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 4*3*i);
ReadIndirectFloatx3(pData);
}
}
// --- Index16 ---
void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte()
{
u16 Index = DataReadU16();
const s8* pData = (const s8 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
ReadIndirectS8x3(pData);
}
void LOADERDECL VertexLoader_Normal::Normal_Index16_Short()
{
u16 Index = DataReadU16();
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
ReadIndirectS16x3(pData);
}
void LOADERDECL VertexLoader_Normal::Normal_Index16_Float()
{
u16 Index = DataReadU16();
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
ReadIndirectFloatx3(pData);
}
void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte3_Indices1()
{
u16 Index = DataReadU16();
const s8* pData = (const s8 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
ReadIndirectS8x9(pData);
}
void LOADERDECL VertexLoader_Normal::Normal_Index16_Short3_Indices1()
{
u16 Index = DataReadU16();
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
ReadIndirectS16x9(pData);
}
void LOADERDECL VertexLoader_Normal::Normal_Index16_Float3_Indices1()
{
u16 Index = DataReadU16();
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
ReadIndirectFloatx9(pData);
}
void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte3_Indices3()
{
for (int i = 0; i < 3; i++)
{
u16 Index = DataReadU16();
const s8* pData = (const s8*)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 1*3*i);
ReadIndirectS8x3(pData);
}
}
void LOADERDECL VertexLoader_Normal::Normal_Index16_Short3_Indices3()
{
for (int i = 0; i < 3; i++)
{
u16 Index = DataReadU16();
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 2*3*i);
ReadIndirectS16x3(pData);
}
}
void LOADERDECL VertexLoader_Normal::Normal_Index16_Float3_Indices3()
{
for (int i = 0; i < 3; i++)
{
u16 Index = DataReadU16();
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 4*3*i);
ReadIndirectFloatx3(pData);
}
}

View File

@ -70,45 +70,20 @@ private:
NUM_NRM_INDICES
};
struct Set {
Set() {}
Set(int gc_size_, TPipelineFunction function_) : gc_size(gc_size_), function(function_) {}
struct Set
{
template <typename T>
void operator=(const T&)
{
gc_size = T::size;
function = T::function;
}
int gc_size;
TPipelineFunction function;
// int pc_size;
};
static Set m_Table[NUM_NRM_TYPE][NUM_NRM_INDICES][NUM_NRM_ELEMENTS][NUM_NRM_FORMAT];
// direct
static void LOADERDECL Normal_DirectByte();
static void LOADERDECL Normal_DirectShort();
static void LOADERDECL Normal_DirectFloat();
static void LOADERDECL Normal_DirectByte3();
static void LOADERDECL Normal_DirectShort3();
static void LOADERDECL Normal_DirectFloat3();
// index8
static void LOADERDECL Normal_Index8_Byte();
static void LOADERDECL Normal_Index8_Short();
static void LOADERDECL Normal_Index8_Float();
static void LOADERDECL Normal_Index8_Byte3_Indices1();
static void LOADERDECL Normal_Index8_Short3_Indices1();
static void LOADERDECL Normal_Index8_Float3_Indices1();
static void LOADERDECL Normal_Index8_Byte3_Indices3();
static void LOADERDECL Normal_Index8_Short3_Indices3();
static void LOADERDECL Normal_Index8_Float3_Indices3();
// index16
static void LOADERDECL Normal_Index16_Byte();
static void LOADERDECL Normal_Index16_Short();
static void LOADERDECL Normal_Index16_Float();
static void LOADERDECL Normal_Index16_Byte3_Indices1();
static void LOADERDECL Normal_Index16_Short3_Indices1();
static void LOADERDECL Normal_Index16_Float3_Indices1();
static void LOADERDECL Normal_Index16_Byte3_Indices3();
static void LOADERDECL Normal_Index16_Short3_Indices3();
static void LOADERDECL Normal_Index16_Float3_Indices3();
};
#endif

View File

@ -15,6 +15,8 @@
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#include <limits>
#include "Common.h"
#include "VideoCommon.h"
#include "VertexLoader.h"
@ -71,101 +73,42 @@ MOVUPS(MOffset(EDI, 0), XMM0);
*/
// ==============================================================================
// Direct
// ==============================================================================
template <class T, bool three>
void Pos_ReadDirect()
template <typename T>
float PosScale(T val)
{
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(T)DataRead<T>() * posScale;
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(T)DataRead<T>() * posScale;
if (three)
((float*)VertexManager::s_pCurBufferPointer)[2] = (float)(T)DataRead<T>() * posScale;
else
((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f;
return val * posScale;
}
template <>
float PosScale(float val)
{ return val; }
template <typename T, int N>
void LOADERDECL Pos_ReadDirect()
{
static_assert(N <= 3, "N > 3 is not sane!");
for (int i = 0; i < 3; ++i)
DataWrite(i<N ? PosScale(DataRead<T>()) : 0.f);
LOG_VTX();
VertexManager::s_pCurBufferPointer += 12;
}
void LOADERDECL Pos_ReadDirect_UByte3() { Pos_ReadDirect<u8, true>(); }
void LOADERDECL Pos_ReadDirect_Byte3() { Pos_ReadDirect<s8, true>(); }
void LOADERDECL Pos_ReadDirect_UShort3() { Pos_ReadDirect<u16, true>(); }
void LOADERDECL Pos_ReadDirect_Short3() { Pos_ReadDirect<s16, true>(); }
void LOADERDECL Pos_ReadDirect_UByte2() { Pos_ReadDirect<u8, false>(); }
void LOADERDECL Pos_ReadDirect_Byte2() { Pos_ReadDirect<s8, false>(); }
void LOADERDECL Pos_ReadDirect_UShort2() { Pos_ReadDirect<u16, false>(); }
void LOADERDECL Pos_ReadDirect_Short2() { Pos_ReadDirect<s16, false>(); }
void LOADERDECL Pos_ReadDirect_Float3()
template <typename I, typename T, int N>
void LOADERDECL Pos_ReadIndex()
{
// No need to use floating point here.
((u32 *)VertexManager::s_pCurBufferPointer)[0] = DataReadU32();
((u32 *)VertexManager::s_pCurBufferPointer)[1] = DataReadU32();
((u32 *)VertexManager::s_pCurBufferPointer)[2] = DataReadU32();
LOG_VTX();
VertexManager::s_pCurBufferPointer += 12;
}
static_assert(!std::numeric_limits<I>::is_signed, "Only unsigned I is sane!");
static_assert(N <= 3, "N > 3 is not sane!");
void LOADERDECL Pos_ReadDirect_Float2()
{
// No need to use floating point here.
((u32 *)VertexManager::s_pCurBufferPointer)[0] = DataReadU32();
((u32 *)VertexManager::s_pCurBufferPointer)[1] = DataReadU32();
((u32 *)VertexManager::s_pCurBufferPointer)[2] = 0;
LOG_VTX();
VertexManager::s_pCurBufferPointer += 12;
}
template<class T, bool three,int MaxSize>
inline void Pos_ReadIndex_Byte(int Index)
{
if(Index < MaxSize)
auto const index = DataRead<I>();
if (index < std::numeric_limits<I>::max())
{
const u8* pData = cached_arraybases[ARRAY_POSITION] + ((u32)Index * arraystrides[ARRAY_POSITION]);
((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(T)(pData[0])) * posScale;
((float*)VertexManager::s_pCurBufferPointer)[1] = ((float)(T)(pData[1])) * posScale;
if (three)
((float*)VertexManager::s_pCurBufferPointer)[2] = ((float)(T)(pData[2])) * posScale;
else
((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f;
LOG_VTX();
VertexManager::s_pCurBufferPointer += 12;
}
}
auto const data = reinterpret_cast<const T*>(cached_arraybases[ARRAY_POSITION] + (index * arraystrides[ARRAY_POSITION]));
template<class T, bool three,int MaxSize>
inline void Pos_ReadIndex_Short(int Index)
{
if(Index < MaxSize)
{
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_POSITION] + ((u32)Index * arraystrides[ARRAY_POSITION]));
((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(T)Common::swap16(pData[0])) * posScale;
((float*)VertexManager::s_pCurBufferPointer)[1] = ((float)(T)Common::swap16(pData[1])) * posScale;
if (three)
((float*)VertexManager::s_pCurBufferPointer)[2] = ((float)(T)Common::swap16(pData[2])) * posScale;
else
((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f;
LOG_VTX();
VertexManager::s_pCurBufferPointer += 12;
}
}
for (int i = 0; i < 3; ++i)
DataWrite(i<N ? PosScale(Common::FromBigEndian(data[i])) : 0.f);
template<bool three,int MaxSize>
void Pos_ReadIndex_Float(int Index)
{
if(Index < MaxSize)
{
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (Index * arraystrides[ARRAY_POSITION]));
((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]);
((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]);
if (three)
((u32*)VertexManager::s_pCurBufferPointer)[2] = Common::swap32(pData[2]);
else
((float*)VertexManager::s_pCurBufferPointer)[2] = 0.0f;
LOG_VTX();
VertexManager::s_pCurBufferPointer += 12;
}
}
@ -173,87 +116,22 @@ void Pos_ReadIndex_Float(int Index)
static const __m128i kMaskSwap32_3 = _mm_set_epi32(0xFFFFFFFFL, 0x08090A0BL, 0x04050607L, 0x00010203L);
static const __m128i kMaskSwap32_2 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x04050607L, 0x00010203L);
template<bool three,int MaxSize>
void Pos_ReadIndex_Float_SSSE3(int Index)
template <typename I, bool three>
void LOADERDECL Pos_ReadIndex_Float_SSSE3()
{
if(Index < MaxSize)
auto const index = DataRead<I>();
if (index < std::numeric_limits<I>::max())
{
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (Index * arraystrides[ARRAY_POSITION]));
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (index * arraystrides[ARRAY_POSITION]));
GC_ALIGNED128(const __m128i a = _mm_loadu_si128((__m128i*)pData));
GC_ALIGNED128(__m128i b = _mm_shuffle_epi8(a, three ? kMaskSwap32_3 : kMaskSwap32_2));
_mm_storeu_si128((__m128i*)VertexManager::s_pCurBufferPointer, b);
VertexManager::s_pCurBufferPointer += sizeof(float) * 3;
LOG_VTX();
VertexManager::s_pCurBufferPointer += 12;
}
}
#endif
// Explicitly instantiate these functions to decrease the possibility of
// symbol binding problems when (only) calling them from JIT compiled code.
template void Pos_ReadDirect<u8, true>();
template void Pos_ReadDirect<s8, true>();
template void Pos_ReadDirect<u16, true>();
template void Pos_ReadDirect<s16, true>();
template void Pos_ReadDirect<u8, false>();
template void Pos_ReadDirect<s8, false>();
template void Pos_ReadDirect<u16, false>();
template void Pos_ReadDirect<s16, false>();
template void Pos_ReadIndex_Byte<u8, true, 255>(int Index);
template void Pos_ReadIndex_Byte<s8, true, 255>(int Index);
template void Pos_ReadIndex_Short<u16, true, 255>(int Index);
template void Pos_ReadIndex_Short<s16, true, 255>(int Index);
template void Pos_ReadIndex_Float<true, 255>(int Index);
template void Pos_ReadIndex_Byte<u8, false, 255>(int Index);
template void Pos_ReadIndex_Byte<s8, false, 255>(int Index);
template void Pos_ReadIndex_Short<u16, false, 255>(int Index);
template void Pos_ReadIndex_Short<s16, false, 255>(int Index);
template void Pos_ReadIndex_Float<false, 255>(int Index);
template void Pos_ReadIndex_Byte<u8, true, 65535>(int Index);
template void Pos_ReadIndex_Byte<s8, true, 65535>(int Index);
template void Pos_ReadIndex_Short<u16, true, 65535>(int Index);
template void Pos_ReadIndex_Short<s16, true, 65535>(int Index);
template void Pos_ReadIndex_Float<true, 65535>(int Index);
template void Pos_ReadIndex_Byte<u8, false, 65535>(int Index);
template void Pos_ReadIndex_Byte<s8, false, 65535>(int Index);
template void Pos_ReadIndex_Short<u16, false, 65535>(int Index);
template void Pos_ReadIndex_Short<s16, false, 65535>(int Index);
template void Pos_ReadIndex_Float<false, 65535>(int Index);
// ==============================================================================
// Index 8
// ==============================================================================
void LOADERDECL Pos_ReadIndex8_UByte3() {Pos_ReadIndex_Byte<u8, true, 255> (DataReadU8());}
void LOADERDECL Pos_ReadIndex8_Byte3() {Pos_ReadIndex_Byte<s8, true, 255> (DataReadU8());}
void LOADERDECL Pos_ReadIndex8_UShort3() {Pos_ReadIndex_Short<u16, true, 255> (DataReadU8());}
void LOADERDECL Pos_ReadIndex8_Short3() {Pos_ReadIndex_Short<s16, true, 255> (DataReadU8());}
void LOADERDECL Pos_ReadIndex8_Float3() {Pos_ReadIndex_Float<true, 255> (DataReadU8());}
void LOADERDECL Pos_ReadIndex8_UByte2() {Pos_ReadIndex_Byte<u8, false, 255>(DataReadU8());}
void LOADERDECL Pos_ReadIndex8_Byte2() {Pos_ReadIndex_Byte<s8, false, 255>(DataReadU8());}
void LOADERDECL Pos_ReadIndex8_UShort2() {Pos_ReadIndex_Short<u16, false, 255>(DataReadU8());}
void LOADERDECL Pos_ReadIndex8_Short2() {Pos_ReadIndex_Short<s16, false, 255>(DataReadU8());}
void LOADERDECL Pos_ReadIndex8_Float2() {Pos_ReadIndex_Float<false, 255> (DataReadU8());}
// ==============================================================================
// Index 16
// ==============================================================================
void LOADERDECL Pos_ReadIndex16_UByte3() {Pos_ReadIndex_Byte<u8, true, 65535> (DataReadU16());}
void LOADERDECL Pos_ReadIndex16_Byte3() {Pos_ReadIndex_Byte<s8, true, 65535> (DataReadU16());}
void LOADERDECL Pos_ReadIndex16_UShort3() {Pos_ReadIndex_Short<u16, true, 65535> (DataReadU16());}
void LOADERDECL Pos_ReadIndex16_Short3() {Pos_ReadIndex_Short<s16, true, 65535> (DataReadU16());}
void LOADERDECL Pos_ReadIndex16_Float3() {Pos_ReadIndex_Float<true, 65535> (DataReadU16());}
void LOADERDECL Pos_ReadIndex16_UByte2() {Pos_ReadIndex_Byte<u8, false, 65535>(DataReadU16());}
void LOADERDECL Pos_ReadIndex16_Byte2() {Pos_ReadIndex_Byte<s8, false, 65535>(DataReadU16());}
void LOADERDECL Pos_ReadIndex16_UShort2() {Pos_ReadIndex_Short<u16, false, 65535>(DataReadU16());}
void LOADERDECL Pos_ReadIndex16_Short2() {Pos_ReadIndex_Short<s16, false, 65535>(DataReadU16());}
void LOADERDECL Pos_ReadIndex16_Float2() {Pos_ReadIndex_Float<false, 65535> (DataReadU16());}
#if _M_SSE >= 0x301
void LOADERDECL Pos_ReadIndex8_Float3_SSSE3() {Pos_ReadIndex_Float_SSSE3<true, 255> (DataReadU8());}
void LOADERDECL Pos_ReadIndex8_Float2_SSSE3() {Pos_ReadIndex_Float_SSSE3<false, 255> (DataReadU8());}
void LOADERDECL Pos_ReadIndex16_Float3_SSSE3() {Pos_ReadIndex_Float_SSSE3<true, 65535> (DataReadU16());}
void LOADERDECL Pos_ReadIndex16_Float2_SSSE3() {Pos_ReadIndex_Float_SSSE3<false, 65535> (DataReadU16());}
#endif
static TPipelineFunction tableReadPosition[4][8][2] = {
{
{NULL, NULL,},
@ -263,56 +141,40 @@ static TPipelineFunction tableReadPosition[4][8][2] = {
{NULL, NULL,},
},
{
{Pos_ReadDirect_UByte2, Pos_ReadDirect_UByte3,},
{Pos_ReadDirect_Byte2, Pos_ReadDirect_Byte3,},
{Pos_ReadDirect_UShort2, Pos_ReadDirect_UShort3,},
{Pos_ReadDirect_Short2, Pos_ReadDirect_Short3,},
{Pos_ReadDirect_Float2, Pos_ReadDirect_Float3,},
{Pos_ReadDirect<u8, 2>, Pos_ReadDirect<u8, 3>,},
{Pos_ReadDirect<s8, 2>, Pos_ReadDirect<s8, 3>,},
{Pos_ReadDirect<u16, 2>, Pos_ReadDirect<u16, 3>,},
{Pos_ReadDirect<s16, 2>, Pos_ReadDirect<s16, 3>,},
{Pos_ReadDirect<float, 2>, Pos_ReadDirect<float, 3>,},
},
{
{Pos_ReadIndex8_UByte2, Pos_ReadIndex8_UByte3,},
{Pos_ReadIndex8_Byte2, Pos_ReadIndex8_Byte3,},
{Pos_ReadIndex8_UShort2, Pos_ReadIndex8_UShort3,},
{Pos_ReadIndex8_Short2, Pos_ReadIndex8_Short3,},
{Pos_ReadIndex8_Float2, Pos_ReadIndex8_Float3,},
{Pos_ReadIndex<u8, u8, 2>, Pos_ReadIndex<u8, u8, 3>,},
{Pos_ReadIndex<u8, s8, 2>, Pos_ReadIndex<u8, s8, 3>,},
{Pos_ReadIndex<u8, u16, 2>, Pos_ReadIndex<u8, u16, 3>,},
{Pos_ReadIndex<u8, s16, 2>, Pos_ReadIndex<u8, s16, 3>,},
{Pos_ReadIndex<u8, float, 2>, Pos_ReadIndex<u8, float, 3>,},
},
{
{Pos_ReadIndex16_UByte2, Pos_ReadIndex16_UByte3,},
{Pos_ReadIndex16_Byte2, Pos_ReadIndex16_Byte3,},
{Pos_ReadIndex16_UShort2, Pos_ReadIndex16_UShort3,},
{Pos_ReadIndex16_Short2, Pos_ReadIndex16_Short3,},
{Pos_ReadIndex16_Float2, Pos_ReadIndex16_Float3,},
{Pos_ReadIndex<u16, u8, 2>, Pos_ReadIndex<u16, u8, 3>,},
{Pos_ReadIndex<u16, s8, 2>, Pos_ReadIndex<u16, s8, 3>,},
{Pos_ReadIndex<u16, u16, 2>, Pos_ReadIndex<u16, u16, 3>,},
{Pos_ReadIndex<u16, s16, 2>, Pos_ReadIndex<u16, s16, 3>,},
{Pos_ReadIndex<u16, float, 2>, Pos_ReadIndex<u16, float, 3>,},
},
};
static int tableReadPositionVertexSize[4][8][2] = {
{
{0, 0,},
{0, 0,},
{0, 0,},
{0, 0,},
{0, 0,},
{0, 0,}, {0, 0,}, {0, 0,}, {0, 0,}, {0, 0,},
},
{
{2, 3,},
{2, 3,},
{4, 6,},
{4, 6,},
{8, 12,},
{2, 3,}, {2, 3,}, {4, 6,}, {4, 6,}, {8, 12,},
},
{
{1, 1,},
{1, 1,},
{1, 1,},
{1, 1,},
{1, 1,},
{1, 1,}, {1, 1,}, {1, 1,}, {1, 1,}, {1, 1,},
},
{
{2, 2,},
{2, 2,},
{2, 2,},
{2, 2,},
{2, 2,},
{2, 2,}, {2, 2,}, {2, 2,}, {2, 2,}, {2, 2,},
},
};
@ -322,10 +184,10 @@ void VertexLoader_Position::Init(void) {
#if _M_SSE >= 0x301
if (cpu_info.bSSSE3) {
tableReadPosition[2][4][0] = Pos_ReadIndex8_Float2_SSSE3;
tableReadPosition[2][4][1] = Pos_ReadIndex8_Float3_SSSE3;
tableReadPosition[3][4][0] = Pos_ReadIndex16_Float2_SSSE3;
tableReadPosition[3][4][1] = Pos_ReadIndex16_Float3_SSSE3;
tableReadPosition[2][4][0] = Pos_ReadIndex_Float_SSSE3<u8, false>;
tableReadPosition[2][4][1] = Pos_ReadIndex_Float_SSSE3<u8, true>;
tableReadPosition[3][4][0] = Pos_ReadIndex_Float_SSSE3<u16, false>;
tableReadPosition[3][4][1] = Pos_ReadIndex_Float_SSSE3<u16, true>;
}
#endif

View File

@ -28,8 +28,22 @@
#include <tmmintrin.h>
#endif
#define LOG_TEX1() // PRIM_LOG("tex: %f, ", ((float*)VertexManager::s_pCurBufferPointer)[0]);
#define LOG_TEX2() // PRIM_LOG("tex: %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[0], ((float*)VertexManager::s_pCurBufferPointer)[1]);
template <int N>
void LOG_TEX();
template <>
__forceinline void LOG_TEX<1>()
{
// warning: mapping buffer should be disabled to use this
// PRIM_LOG("tex: %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-1]);
}
template <>
__forceinline void LOG_TEX<2>()
{
// warning: mapping buffer should be disabled to use this
// PRIM_LOG("tex: %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-2], ((float*)VertexManager::s_pCurBufferPointer)[-1]);
}
extern int tcIndex;
extern float tcScale[8];
@ -39,279 +53,54 @@ void LOADERDECL TexCoord_Read_Dummy()
tcIndex++;
}
void LOADERDECL TexCoord_ReadDirect_UByte1()
template <typename T>
float TCScale(T val)
{
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU8() * tcScale[tcIndex];
LOG_TEX1();
VertexManager::s_pCurBufferPointer += 4;
tcIndex++;
}
void LOADERDECL TexCoord_ReadDirect_UByte2()
{
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU8() * tcScale[tcIndex];
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)DataReadU8() * tcScale[tcIndex];
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
tcIndex++;
return val * tcScale[tcIndex];
}
void LOADERDECL TexCoord_ReadDirect_Byte1()
template <>
float TCScale(float val)
{ return val; }
template <typename T, int N>
void LOADERDECL TexCoord_ReadDirect()
{
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)DataReadU8() * tcScale[tcIndex];
LOG_TEX1();
VertexManager::s_pCurBufferPointer += 4;
tcIndex++;
}
void LOADERDECL TexCoord_ReadDirect_Byte2()
{
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)DataReadU8() * tcScale[tcIndex];
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s8)DataReadU8() * tcScale[tcIndex];
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
tcIndex++;
for (int i = 0; i != N; ++i)
DataWrite(TCScale(DataRead<T>()));
LOG_TEX<N>();
++tcIndex;
}
void LOADERDECL TexCoord_ReadDirect_UShort1()
template <typename I, typename T, int N>
void LOADERDECL TexCoord_ReadIndex()
{
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU16() * tcScale[tcIndex];
LOG_TEX1();
VertexManager::s_pCurBufferPointer += 4;
tcIndex++;
}
void LOADERDECL TexCoord_ReadDirect_UShort2()
{
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU16() * tcScale[tcIndex];
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)DataReadU16() * tcScale[tcIndex];
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
tcIndex++;
}
static_assert(!std::numeric_limits<I>::is_signed, "Only unsigned I is sane!");
void LOADERDECL TexCoord_ReadDirect_Short1()
{
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)DataReadU16() * tcScale[tcIndex];
LOG_TEX1();
VertexManager::s_pCurBufferPointer += 4;
tcIndex++;
}
void LOADERDECL TexCoord_ReadDirect_Short2()
{
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)DataReadU16() * tcScale[tcIndex];
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s16)DataReadU16() * tcScale[tcIndex];
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
tcIndex++;
}
auto const index = DataRead<I>();
auto const data = reinterpret_cast<const T*>(cached_arraybases[ARRAY_TEXCOORD0 + tcIndex]
+ (index * arraystrides[ARRAY_TEXCOORD0 + tcIndex]));
void LOADERDECL TexCoord_ReadDirect_Float1()
{
((u32*)VertexManager::s_pCurBufferPointer)[0] = DataReadU32();
LOG_TEX1();
VertexManager::s_pCurBufferPointer += 4;
tcIndex++;
}
void LOADERDECL TexCoord_ReadDirect_Float2()
{
((u32*)VertexManager::s_pCurBufferPointer)[0] = DataReadU32();
((u32*)VertexManager::s_pCurBufferPointer)[1] = DataReadU32();
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
tcIndex++;
}
for (int i = 0; i != N; ++i)
DataWrite(TCScale(Common::FromBigEndian(data[i])));
// ==================================================================================
void LOADERDECL TexCoord_ReadIndex8_UByte1()
{
u8 Index = DataReadU8();
const u8 *pData = cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(*pData) * tcScale[tcIndex];
LOG_TEX1();
VertexManager::s_pCurBufferPointer += 4;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex8_UByte2()
{
u8 Index = DataReadU8();
const u8 *pData = cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u8)(pData[0]) * tcScale[tcIndex];
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(u8)(pData[1]) * tcScale[tcIndex];
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex8_Byte1()
{
u8 Index = DataReadU8();
const u8 *pData = cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)(*pData) * tcScale[tcIndex];
LOG_TEX1();
VertexManager::s_pCurBufferPointer += 4;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex8_Byte2()
{
u8 Index = DataReadU8();
const u8 *pData = cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)(pData[0]) * tcScale[tcIndex];
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s8)(pData[1]) * tcScale[tcIndex];
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex8_UShort1()
{
u8 Index = DataReadU8();
const u16 *pData = (const u16 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u16)Common::swap16(*pData) * tcScale[tcIndex];
LOG_TEX1();
VertexManager::s_pCurBufferPointer += 4;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex8_UShort2()
{
u8 Index = DataReadU8();
const u16 *pData = (const u16 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u16)Common::swap16(pData[0]) * tcScale[tcIndex];
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(u16)Common::swap16(pData[1]) * tcScale[tcIndex];
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex8_Short1()
{
u8 Index = DataReadU8();
const u16 *pData = (const u16 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)Common::swap16(pData[0]) * tcScale[tcIndex];
LOG_TEX1();
VertexManager::s_pCurBufferPointer += 4;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex8_Short2()
{
u8 Index = DataReadU8();
const u16 *pData = (const u16 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)Common::swap16(pData[0]) * tcScale[tcIndex];
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s16)Common::swap16(pData[1]) * tcScale[tcIndex];
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex8_Float1()
{
u16 Index = DataReadU8();
const u32 *pData = (const u32 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]);
LOG_TEX1();
VertexManager::s_pCurBufferPointer += 4;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex8_Float2()
{
u16 Index = DataReadU8();
const u32 *pData = (const u32 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]);
((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]);
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
tcIndex++;
}
// ==================================================================================
void LOADERDECL TexCoord_ReadIndex16_UByte1()
{
u16 Index = DataReadU16();
const u8 *pData = cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u8)(pData[0]) * tcScale[tcIndex];
LOG_TEX1();
VertexManager::s_pCurBufferPointer += 4;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex16_UByte2()
{
u16 Index = DataReadU16();
const u8 *pData = cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u8)(pData[0]) * tcScale[tcIndex];
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(u8)(pData[1]) * tcScale[tcIndex];
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex16_Byte1()
{
u16 Index = DataReadU16();
const u8 *pData = cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)(pData[0]) * tcScale[tcIndex];
LOG_TEX1();
VertexManager::s_pCurBufferPointer += 4;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex16_Byte2()
{
u16 Index = DataReadU16();
const u8 *pData = cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]);
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)(pData[0]) * tcScale[tcIndex];
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s8)(pData[1]) * tcScale[tcIndex];
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex16_UShort1()
{
u16 Index = DataReadU16();
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u16)Common::swap16(pData[0]) * tcScale[tcIndex];
LOG_TEX1();
VertexManager::s_pCurBufferPointer += 4;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex16_UShort2()
{
u16 Index = DataReadU16();
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(u16)Common::swap16(pData[0]) * tcScale[tcIndex];
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(u16)Common::swap16(pData[1]) * tcScale[tcIndex];
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex16_Short1()
{
u16 Index = DataReadU16();
const u16 *pData = (const u16 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)Common::swap16(*pData) * tcScale[tcIndex];
LOG_TEX1();
VertexManager::s_pCurBufferPointer += 4;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex16_Short2()
{
// Heavy in ZWW
u16 Index = DataReadU16();
const u16 *pData = (const u16 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)Common::swap16(pData[0]) * tcScale[tcIndex];
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s16)Common::swap16(pData[1]) * tcScale[tcIndex];
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
tcIndex++;
LOG_TEX<N>();
++tcIndex;
}
#if _M_SSE >= 0x401
static const __m128i kMaskSwap16_2 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0xFFFFFFFFL, 0x02030001L);
void LOADERDECL TexCoord_ReadIndex16_Short2_SSE4()
template <typename I>
void LOADERDECL TexCoord_ReadIndex_Short2_SSE4()
{
static_assert(!std::numeric_limits<I>::is_signed, "Only unsigned I is sane!");
// Heavy in ZWW
u16 Index = DataReadU16();
const s32 *pData = (const s32*)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
auto const index = DataRead<I>();
const s32 *pData = (const s32*)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
const __m128i a = _mm_cvtsi32_si128(*pData);
const __m128i b = _mm_shuffle_epi8(a, kMaskSwap16_2);
const __m128i c = _mm_cvtepi16_epi32(b);
@ -319,47 +108,27 @@ void LOADERDECL TexCoord_ReadIndex16_Short2_SSE4()
const __m128 e = _mm_load1_ps(&tcScale[tcIndex]);
const __m128 f = _mm_mul_ps(d, e);
_mm_storeu_ps((float*)VertexManager::s_pCurBufferPointer, f);
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
VertexManager::s_pCurBufferPointer += sizeof(float) * 2;
LOG_TEX<2>();
tcIndex++;
}
#endif
void LOADERDECL TexCoord_ReadIndex16_Float1()
{
u16 Index = DataReadU16();
const u32 *pData = (const u32 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]);
LOG_TEX1();
VertexManager::s_pCurBufferPointer += 4;
tcIndex++;
}
void LOADERDECL TexCoord_ReadIndex16_Float2()
{
u16 Index = DataReadU16();
const u32 *pData = (const u32 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]);
((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]);
LOG_TEX2();
VertexManager::s_pCurBufferPointer += 8;
tcIndex++;
}
#if _M_SSE >= 0x301
static const __m128i kMaskSwap32 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x04050607L, 0x00010203L);
void LOADERDECL TexCoord_ReadIndex16_Float2_SSSE3()
template <typename I>
void LOADERDECL TexCoord_ReadIndex_Float2_SSSE3()
{
u16 Index = DataReadU16();
const u32 *pData = (const u32 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (Index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
static_assert(!std::numeric_limits<I>::is_signed, "Only unsigned I is sane!");
auto const index = DataRead<I>();
const u32 *pData = (const u32 *)(cached_arraybases[ARRAY_TEXCOORD0+tcIndex] + (index * arraystrides[ARRAY_TEXCOORD0+tcIndex]));
GC_ALIGNED128(const __m128i a = _mm_loadl_epi64((__m128i*)pData));
GC_ALIGNED128(const __m128i b = _mm_shuffle_epi8(a, kMaskSwap32));
u8* p = VertexManager::s_pCurBufferPointer;
_mm_storel_epi64((__m128i*)p, b);
LOG_TEX2();
p += 8;
VertexManager::s_pCurBufferPointer = p;
_mm_storel_epi64((__m128i*)VertexManager::s_pCurBufferPointer, b);
VertexManager::s_pCurBufferPointer += sizeof(float) * 2;
LOG_TEX<2>();
tcIndex++;
}
#endif
@ -373,56 +142,40 @@ static TPipelineFunction tableReadTexCoord[4][8][2] = {
{NULL, NULL,},
},
{
{TexCoord_ReadDirect_UByte1, TexCoord_ReadDirect_UByte2,},
{TexCoord_ReadDirect_Byte1, TexCoord_ReadDirect_Byte2,},
{TexCoord_ReadDirect_UShort1, TexCoord_ReadDirect_UShort2,},
{TexCoord_ReadDirect_Short1, TexCoord_ReadDirect_Short2,},
{TexCoord_ReadDirect_Float1, TexCoord_ReadDirect_Float2,},
{TexCoord_ReadDirect<u8, 1>, TexCoord_ReadDirect<u8, 2>,},
{TexCoord_ReadDirect<s8, 1>, TexCoord_ReadDirect<s8, 2>,},
{TexCoord_ReadDirect<u16, 1>, TexCoord_ReadDirect<u16, 2>,},
{TexCoord_ReadDirect<s16, 1>, TexCoord_ReadDirect<s16, 2>,},
{TexCoord_ReadDirect<float, 1>, TexCoord_ReadDirect<float, 2>,},
},
{
{TexCoord_ReadIndex8_UByte1, TexCoord_ReadIndex8_UByte2,},
{TexCoord_ReadIndex8_Byte1, TexCoord_ReadIndex8_Byte2,},
{TexCoord_ReadIndex8_UShort1, TexCoord_ReadIndex8_UShort2,},
{TexCoord_ReadIndex8_Short1, TexCoord_ReadIndex8_Short2,},
{TexCoord_ReadIndex8_Float1, TexCoord_ReadIndex8_Float2,},
{TexCoord_ReadIndex<u8, u8, 1>, TexCoord_ReadIndex<u8, u8, 2>,},
{TexCoord_ReadIndex<u8, s8, 1>, TexCoord_ReadIndex<u8, s8, 2>,},
{TexCoord_ReadIndex<u8, u16, 1>, TexCoord_ReadIndex<u8, u16, 2>,},
{TexCoord_ReadIndex<u8, s16, 1>, TexCoord_ReadIndex<u8, s16, 2>,},
{TexCoord_ReadIndex<u8, float, 1>, TexCoord_ReadIndex<u8, float, 2>,},
},
{
{TexCoord_ReadIndex16_UByte1, TexCoord_ReadIndex16_UByte2,},
{TexCoord_ReadIndex16_Byte1, TexCoord_ReadIndex16_Byte2,},
{TexCoord_ReadIndex16_UShort1, TexCoord_ReadIndex16_UShort2,},
{TexCoord_ReadIndex16_Short1, TexCoord_ReadIndex16_Short2,},
{TexCoord_ReadIndex16_Float1, TexCoord_ReadIndex16_Float2,},
{TexCoord_ReadIndex<u16, u8, 1>, TexCoord_ReadIndex<u16, u8, 2>,},
{TexCoord_ReadIndex<u16, s8, 1>, TexCoord_ReadIndex<u16, s8, 2>,},
{TexCoord_ReadIndex<u16, u16, 1>, TexCoord_ReadIndex<u16, u16, 2>,},
{TexCoord_ReadIndex<u16, s16, 1>, TexCoord_ReadIndex<u16, s16, 2>,},
{TexCoord_ReadIndex<u16, float, 1>, TexCoord_ReadIndex<u16, float, 2>,},
},
};
static int tableReadTexCoordVertexSize[4][8][2] = {
{
{0, 0,},
{0, 0,},
{0, 0,},
{0, 0,},
{0, 0,},
{0, 0,}, {0, 0,}, {0, 0,}, {0, 0,}, {0, 0,},
},
{
{1, 2,},
{1, 2,},
{2, 4,},
{2, 4,},
{4, 8,},
{1, 2,}, {1, 2,}, {2, 4,}, {2, 4,}, {4, 8,},
},
{
{1, 1,},
{1, 1,},
{1, 1,},
{1, 1,},
{1, 1,},
{1, 1,}, {1, 1,}, {1, 1,}, {1, 1,}, {1, 1,},
},
{
{2, 2,},
{2, 2,},
{2, 2,},
{2, 2,},
{2, 2,},
{2, 2,}, {2, 2,}, {2, 2,}, {2, 2,}, {2, 2,},
},
};
@ -430,16 +183,20 @@ void VertexLoader_TextCoord::Init(void) {
#if _M_SSE >= 0x301
if (cpu_info.bSSSE3) {
tableReadTexCoord[3][4][1] = TexCoord_ReadIndex16_Float2_SSSE3;
if (cpu_info.bSSSE3)
{
tableReadTexCoord[2][4][1] = TexCoord_ReadIndex_Float2_SSSE3<u8>;
tableReadTexCoord[3][4][1] = TexCoord_ReadIndex_Float2_SSSE3<u16>;
}
#endif
#if _M_SSE >= 0x401
if (cpu_info.bSSE4_1) {
tableReadTexCoord[3][3][1] = TexCoord_ReadIndex16_Short2_SSE4;
if (cpu_info.bSSE4_1)
{
tableReadTexCoord[2][3][1] = TexCoord_ReadIndex_Short2_SSE4<u8>;
tableReadTexCoord[3][3][1] = TexCoord_ReadIndex_Short2_SSE4<u16>;
}
#endif

View File

@ -12,171 +12,120 @@
#include "BPStructs.h"
#include "VertexManagerBase.h"
#include "MainBase.h"
#include "VideoConfig.h"
VertexManager *g_vertex_manager;
u8 *VertexManager::s_pCurBufferPointer;
u8 *VertexManager::s_pBaseBufferPointer;
u8 *VertexManager::LocalVBuffer;
u16 *VertexManager::TIBuffer;
u16 *VertexManager::LIBuffer;
u16 *VertexManager::PIBuffer;
bool VertexManager::Flushed;
u8 *VertexManager::s_pEndBufferPointer;
VertexManager::VertexManager()
{
Flushed = false;
LocalVBuffer.resize(MAXVBUFFERSIZE);
s_pCurBufferPointer = s_pBaseBufferPointer = &LocalVBuffer[0];
s_pEndBufferPointer = s_pBaseBufferPointer + LocalVBuffer.size();
LocalVBuffer = new u8[MAXVBUFFERSIZE];
s_pCurBufferPointer = s_pBaseBufferPointer = LocalVBuffer;
TIBuffer.resize(MAXIBUFFERSIZE);
LIBuffer.resize(MAXIBUFFERSIZE);
PIBuffer.resize(MAXIBUFFERSIZE);
TIBuffer = new u16[MAXIBUFFERSIZE];
LIBuffer = new u16[MAXIBUFFERSIZE];
PIBuffer = new u16[MAXIBUFFERSIZE];
IndexGenerator::Start(TIBuffer, LIBuffer, PIBuffer);
}
void VertexManager::ResetBuffer()
{
s_pCurBufferPointer = LocalVBuffer;
}
VertexManager::~VertexManager()
{
delete[] LocalVBuffer;
delete[] TIBuffer;
delete[] LIBuffer;
delete[] PIBuffer;
// TODO: necessary??
ResetBuffer();
}
void VertexManager::AddIndices(int primitive, int numVertices)
{
//switch (primitive)
//{
//case GX_DRAW_QUADS: IndexGenerator::AddQuads(numVertices); break;
//case GX_DRAW_TRIANGLES: IndexGenerator::AddList(numVertices); break;
//case GX_DRAW_TRIANGLE_STRIP: IndexGenerator::AddStrip(numVertices); break;
//case GX_DRAW_TRIANGLE_FAN: IndexGenerator::AddFan(numVertices); break;
//case GX_DRAW_LINES: IndexGenerator::AddLineList(numVertices); break;
//case GX_DRAW_LINE_STRIP: IndexGenerator::AddLineStrip(numVertices); break;
//case GX_DRAW_POINTS: IndexGenerator::AddPoints(numVertices); break;
//}
VertexManager::~VertexManager()
{}
static void (*const primitive_table[])(int) =
void VertexManager::ResetBuffer()
{
s_pCurBufferPointer = s_pBaseBufferPointer;
IndexGenerator::Start(GetTriangleIndexBuffer(), GetLineIndexBuffer(), GetPointIndexBuffer());
}
u32 VertexManager::GetRemainingSize()
{
return (u32)(s_pEndBufferPointer - s_pCurBufferPointer);
}
void VertexManager::PrepareForAdditionalData(int primitive, u32 count, u32 stride)
{
u32 const needed_vertex_bytes = count * stride;
if (needed_vertex_bytes > GetRemainingSize() || count > GetRemainingIndices(primitive))
{
IndexGenerator::AddQuads,
NULL,
IndexGenerator::AddList,
IndexGenerator::AddStrip,
IndexGenerator::AddFan,
IndexGenerator::AddLineList,
IndexGenerator::AddLineStrip,
IndexGenerator::AddPoints,
};
Flush();
primitive_table[primitive](numVertices);
if (needed_vertex_bytes > GetRemainingSize())
ERROR_LOG(VIDEO, "VertexManager: Buffer not large enough for all vertices! "
"Increase MAXVBUFFERSIZE or we need primitive breaking afterall.");
if (count > GetRemainingIndices(primitive))
ERROR_LOG(VIDEO, "VertexManager: Buffer not large enough for all indices! "
"Increase MAXIBUFFERSIZE or we need primitive breaking afterall.");
}
}
int VertexManager::GetRemainingSize()
bool VertexManager::IsFlushed() const
{
return MAXVBUFFERSIZE - (int)(s_pCurBufferPointer - LocalVBuffer);
return s_pBaseBufferPointer == s_pCurBufferPointer;
}
int VertexManager::GetRemainingVertices(int primitive)
u32 VertexManager::GetRemainingIndices(int primitive)
{
switch (primitive)
{
case GX_DRAW_QUADS:
return (MAXIBUFFERSIZE - IndexGenerator::GetTriangleindexLen()) / 6 * 4;
case GX_DRAW_TRIANGLES:
return (MAXIBUFFERSIZE - IndexGenerator::GetTriangleindexLen());
case GX_DRAW_TRIANGLE_STRIP:
return (MAXIBUFFERSIZE - IndexGenerator::GetTriangleindexLen()) / 3 + 2;
case GX_DRAW_TRIANGLE_FAN:
return (MAXIBUFFERSIZE - IndexGenerator::GetTriangleindexLen()) / 3;
break;
return (MAXIBUFFERSIZE - IndexGenerator::GetTriangleindexLen()) / 3 + 2;
case GX_DRAW_LINES:
return (MAXIBUFFERSIZE - IndexGenerator::GetLineindexLen());
case GX_DRAW_LINE_STRIP:
return (MAXIBUFFERSIZE - IndexGenerator::GetLineindexLen()) / 2;
break;
return (MAXIBUFFERSIZE - IndexGenerator::GetLineindexLen()) / 2 + 1;
case GX_DRAW_POINTS:
return (MAXIBUFFERSIZE - IndexGenerator::GetPointindexLen());
break;
default:
return 0;
break;
}
}
void VertexManager::AddVertices(int primitive, int numVertices)
void VertexManager::AddVertices(int primitive, u32 numVertices)
{
if (numVertices <= 0)
return;
switch (primitive)
{
case GX_DRAW_QUADS:
case GX_DRAW_TRIANGLES:
case GX_DRAW_TRIANGLE_STRIP:
case GX_DRAW_TRIANGLE_FAN:
if (MAXIBUFFERSIZE - IndexGenerator::GetTriangleindexLen() < 3 * numVertices)
Flush();
break;
case GX_DRAW_LINES:
case GX_DRAW_LINE_STRIP:
if (MAXIBUFFERSIZE - IndexGenerator::GetLineindexLen() < 2 * numVertices)
Flush();
break;
case GX_DRAW_POINTS:
if (MAXIBUFFERSIZE - IndexGenerator::GetPointindexLen() < numVertices)
Flush();
break;
default:
return;
break;
}
if (Flushed)
{
IndexGenerator::Start(TIBuffer, LIBuffer, PIBuffer);
Flushed = false;
}
ADDSTAT(stats.thisFrame.numPrims, numVertices);
INCSTAT(stats.thisFrame.numPrimitiveJoins);
AddIndices(primitive, numVertices);
IndexGenerator::AddIndices(primitive, numVertices);
}
void VertexManager::Flush()
{
if (g_vertex_manager->IsFlushed())
return;
// loading a state will invalidate BP, so check for it
g_video_backend->CheckInvalidState();
VideoFifo_CheckEFBAccess();
g_vertex_manager->vFlush();
g_vertex_manager->ResetBuffer();
}
// TODO: need to merge more stuff into VideoCommon to use this
#if (0)
void VertexManager::Flush()
{
if (LocalVBuffer == s_pCurBufferPointer || Flushed)
return;
Flushed = true;
VideoFifo_CheckEFBAccess();
#if defined(_DEBUG) || defined(DEBUGFAST)
PRIM_LOG("frame%d:\n texgen=%d, numchan=%d, dualtex=%d, ztex=%d, cole=%d, alpe=%d, ze=%d", g_ActiveConfig.iSaveTargetId, xfregs.numTexGens,
xfregs.nNumChans, (int)xfregs.bEnableDualTexTransform, bpmem.ztex2.op,
@ -249,9 +198,9 @@ void VertexManager::Flush()
// finally bind
if (false == PixelShaderCache::SetShader(false, g_nativeVertexFmt->m_components))
goto shader_fail;
return;
if (false == VertexShaderCache::SetShader(g_nativeVertexFmt->m_components))
goto shader_fail;
return;
const int stride = g_nativeVertexFmt->GetVertexStride();
//if (g_nativeVertexFmt)
@ -265,7 +214,7 @@ void VertexManager::Flush()
if (false == g_ActiveConfig.bDstAlphaPass && bpmem.dstalpha.enable && bpmem.blendmode.alphaupdate)
{
if (false == PixelShaderCache::SetShader(true, g_nativeVertexFmt->m_components))
goto shader_fail;
return;
g_vertex_manager->Draw(stride, true);
}
@ -301,9 +250,6 @@ void VertexManager::Flush()
}
#endif
++g_Config.iSaveTargetId;
shader_fail:
ResetBuffer();
}
#endif
@ -314,12 +260,16 @@ void VertexManager::DoState(PointerWrap& p)
void VertexManager::DoStateShared(PointerWrap& p)
{
p.DoPointer(s_pCurBufferPointer, LocalVBuffer);
p.DoArray(LocalVBuffer, MAXVBUFFERSIZE);
p.DoArray(TIBuffer, MAXIBUFFERSIZE);
p.DoArray(LIBuffer, MAXIBUFFERSIZE);
p.DoArray(PIBuffer, MAXIBUFFERSIZE);
// It seems we half-assume to be flushed here
// We update s_pCurBufferPointer yet don't worry about IndexGenerator's outdated pointers
// and maybe other things are overlooked
if (p.GetMode() == PointerWrap::MODE_READ)
Flushed = false;
p.Do(LocalVBuffer);
p.Do(TIBuffer);
p.Do(LIBuffer);
p.Do(PIBuffer);
s_pBaseBufferPointer = &LocalVBuffer[0];
s_pEndBufferPointer = s_pBaseBufferPointer + LocalVBuffer.size();
p.DoPointer(s_pCurBufferPointer, s_pBaseBufferPointer);
}

View File

@ -2,72 +2,70 @@
#ifndef _VERTEXMANAGERBASE_H
#define _VERTEXMANAGERBASE_H
#include <vector>
class NativeVertexFormat;
class PointerWrap;
class VertexManager
{
private:
// What are the actual values?
static const u32 SMALLEST_POSSIBLE_VERTEX = 1;
static const u32 LARGEST_POSSIBLE_VERTEX = 188;
static const u32 MAX_PRIMITIVES_PER_COMMAND = (u16)-1;
public:
static const u32 MAXVBUFFERSIZE = MAX_PRIMITIVES_PER_COMMAND * LARGEST_POSSIBLE_VERTEX;
enum
{
// values from OGL backend
//MAXVBUFFERSIZE = 0x1FFFF,
//MAXIBUFFERSIZE = 0xFFFF,
// values from DX9 backend
//MAXVBUFFERSIZE = 0x50000,
//MAXIBUFFERSIZE = 0xFFFF,
// values from DX11 backend
MAXVBUFFERSIZE = 0x50000,
MAXIBUFFERSIZE = 0xFFFF,
};
// We may convert triangle-fans to triangle-lists, almost 3x as many indices.
static const u32 MAXIBUFFERSIZE = MAX_PRIMITIVES_PER_COMMAND * 3;
VertexManager();
virtual ~VertexManager(); // needs to be virtual for DX11's dtor
// needs to be virtual for DX11's dtor
virtual ~VertexManager();
static void AddVertices(int _primitive, int _numVertices);
static void AddVertices(int _primitive, u32 _numVertices);
// TODO: protected?
static u8 *s_pCurBufferPointer;
static u8 *s_pBaseBufferPointer;
static u8 *s_pEndBufferPointer;
static int GetRemainingSize();
static int GetRemainingVertices(int primitive);
static u32 GetRemainingSize();
static void PrepareForAdditionalData(int primitive, u32 count, u32 stride);
static u32 GetRemainingIndices(int primitive);
static void Flush();
virtual ::NativeVertexFormat* CreateNativeVertexFormat() = 0;
static u16* GetTriangleIndexBuffer() { return TIBuffer; }
static u16* GetLineIndexBuffer() { return LIBuffer; }
static u16* GetPointIndexBuffer() { return PIBuffer; }
static u8* GetVertexBuffer() { return LocalVBuffer; }
static void DoState(PointerWrap& p);
virtual void CreateDeviceObjects(){};
virtual void DestroyDeviceObjects(){};
protected:
// TODO: make private after Flush() is merged
static void ResetBuffer();
static u8 *LocalVBuffer;
static u16 *TIBuffer;
static u16 *LIBuffer;
static u16 *PIBuffer;
static bool Flushed;
u16* GetTriangleIndexBuffer() { return &TIBuffer[0]; }
u16* GetLineIndexBuffer() { return &LIBuffer[0]; }
u16* GetPointIndexBuffer() { return &PIBuffer[0]; }
u8* GetVertexBuffer() { return &s_pBaseBufferPointer[0]; }
virtual void vDoState(PointerWrap& p) { DoStateShared(p); }
void DoStateShared(PointerWrap& p);
private:
static void AddIndices(int primitive, int numVertices);
bool IsFlushed() const;
void ResetBuffer();
//virtual void Draw(u32 stride, bool alphapass) = 0;
// temp
virtual void vFlush() = 0;
std::vector<u8> LocalVBuffer;
std::vector<u16> TIBuffer;
std::vector<u16> LIBuffer;
std::vector<u16> PIBuffer;
};
extern VertexManager *g_vertex_manager;

View File

@ -90,8 +90,8 @@ struct TargetRectangle : public MathUtil::Rectangle<int>
#define PRIM_LOG(...) DEBUG_LOG(VIDEO, ##__VA_ARGS__)
#endif
// #define LOG_VTX() DEBUG_LOG(VIDEO, "vtx: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[0], ((float*)VertexManager::s_pCurBufferPointer)[1], ((float*)VertexManager::s_pCurBufferPointer)[2]);
// warning: mapping buffer should be disabled to use this
// #define LOG_VTX() DEBUG_LOG(VIDEO, "vtx: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-3], ((float*)VertexManager::s_pCurBufferPointer)[-2], ((float*)VertexManager::s_pCurBufferPointer)[-1]);
#define LOG_VTX()

View File

@ -550,8 +550,7 @@ void CompileAndRunDisplayList(u32 address, u32 size, CachedDisplayList *dl)
cmd_byte & GX_VAT_MASK, // Vertex loader index (0 - 7)
(cmd_byte & GX_PRIMITIVE_MASK) >> GX_PRIMITIVE_SHIFT,
numVertices);
u8* EndAddress = VertexManager::s_pCurBufferPointer;
u32 Vdatasize = (u32)(EndAddress - StartAddress);
u32 Vdatasize = (u32)(VertexManager::s_pCurBufferPointer - StartAddress);
if (Vdatasize > 0)
{
// Compile

View File

@ -104,7 +104,7 @@ void VertexManager::LoadBuffers()
{
D3D11_MAPPED_SUBRESOURCE map;
UINT vSize = UINT(s_pCurBufferPointer - LocalVBuffer);
UINT vSize = UINT(s_pCurBufferPointer - s_pBaseBufferPointer);
D3D11_MAP MapType = D3D11_MAP_WRITE_NO_OVERWRITE;
if (m_vertexBufferCursor + vSize >= VBUFFER_SIZE)
{
@ -116,7 +116,7 @@ void VertexManager::LoadBuffers()
D3D::context->Map(m_vertexBuffers[m_activeVertexBuffer], 0, MapType, 0, &map);
memcpy((u8*)map.pData + m_vertexBufferCursor, LocalVBuffer, vSize);
memcpy((u8*)map.pData + m_vertexBufferCursor, s_pBaseBufferPointer, vSize);
D3D::context->Unmap(m_vertexBuffers[m_activeVertexBuffer], 0);
m_vertexDrawOffset = m_vertexBufferCursor;
m_vertexBufferCursor += vSize;
@ -136,9 +136,9 @@ void VertexManager::LoadBuffers()
m_triangleDrawIndex = m_indexBufferCursor;
m_lineDrawIndex = m_triangleDrawIndex + IndexGenerator::GetTriangleindexLen();
m_pointDrawIndex = m_lineDrawIndex + IndexGenerator::GetLineindexLen();
memcpy((u16*)map.pData + m_triangleDrawIndex, TIBuffer, sizeof(u16) * IndexGenerator::GetTriangleindexLen());
memcpy((u16*)map.pData + m_lineDrawIndex, LIBuffer, sizeof(u16) * IndexGenerator::GetLineindexLen());
memcpy((u16*)map.pData + m_pointDrawIndex, PIBuffer, sizeof(u16) * IndexGenerator::GetPointindexLen());
memcpy((u16*)map.pData + m_triangleDrawIndex, GetTriangleIndexBuffer(), sizeof(u16) * IndexGenerator::GetTriangleindexLen());
memcpy((u16*)map.pData + m_lineDrawIndex, GetLineIndexBuffer(), sizeof(u16) * IndexGenerator::GetLineindexLen());
memcpy((u16*)map.pData + m_pointDrawIndex, GetPointIndexBuffer(), sizeof(u16) * IndexGenerator::GetPointindexLen());
D3D::context->Unmap(m_indexBuffers[m_activeIndexBuffer], 0);
m_indexBufferCursor += iCount;
}
@ -208,13 +208,9 @@ void VertexManager::Draw(UINT stride)
if (IndexGenerator::GetNumLines() > 0 || IndexGenerator::GetNumPoints() > 0)
((DX11::Renderer*)g_renderer)->RestoreCull();
}
void VertexManager::vFlush()
{
if (LocalVBuffer == s_pCurBufferPointer) return;
if (Flushed) return;
Flushed=true;
VideoFifo_CheckEFBAccess();
u32 usedtextures = 0;
for (u32 i = 0; i < (u32)bpmem.genMode.numtevstages + 1; ++i)
if (bpmem.tevorders[i / 2].getEnable(i & 1))
@ -262,12 +258,12 @@ void VertexManager::vFlush()
g_nativeVertexFmt->m_components))
{
GFX_DEBUGGER_PAUSE_LOG_AT(NEXT_ERROR,true,{printf("Fail to set pixel shader\n");});
goto shader_fail;
return;
}
if (!VertexShaderCache::SetShader(g_nativeVertexFmt->m_components))
{
GFX_DEBUGGER_PAUSE_LOG_AT(NEXT_ERROR,true,{printf("Fail to set pixel shader\n");});
goto shader_fail;
return;
}
LoadBuffers();
unsigned int stride = g_nativeVertexFmt->GetVertexStride();
@ -281,9 +277,6 @@ void VertexManager::vFlush()
GFX_DEBUGGER_PAUSE_AT(NEXT_FLUSH, true);
g_renderer->RestoreState();
shader_fail:
ResetBuffer();
}
} // namespace

View File

@ -173,7 +173,7 @@ void VertexManager::PrepareVBuffers(int stride)
DestroyDeviceObjects();
return;
}
memcpy(pVertices, LocalVBuffer, datasize);
memcpy(pVertices, s_pBaseBufferPointer, datasize);
VBuffers[CurrentVBuffer]->Unlock();
LockMode = D3DLOCK_NOOVERWRITE;
@ -192,17 +192,17 @@ void VertexManager::PrepareVBuffers(int stride)
}
if(TdataSize)
{
memcpy(pIndices, TIBuffer, TdataSize * sizeof(u16));
memcpy(pIndices, GetTriangleIndexBuffer(), TdataSize * sizeof(u16));
pIndices += TdataSize;
}
if(LDataSize)
{
memcpy(pIndices, LIBuffer, LDataSize * sizeof(u16));
memcpy(pIndices, GetLineIndexBuffer(), LDataSize * sizeof(u16));
pIndices += LDataSize;
}
if(PDataSize)
{
memcpy(pIndices, PIBuffer, PDataSize * sizeof(u16));
memcpy(pIndices, GetPointIndexBuffer(), PDataSize * sizeof(u16));
}
IBuffers[CurrentIBuffer]->Unlock();
D3D::dev->SetStreamSource( 0, VBuffers[CurrentVBuffer], CurrentVBufferIndex, stride);
@ -266,9 +266,9 @@ void VertexManager::DrawVA(int stride)
if (FAILED(D3D::dev->DrawIndexedPrimitiveUP(
D3DPT_TRIANGLELIST,
0, IndexGenerator::GetNumVerts(), IndexGenerator::GetNumTriangles(),
TIBuffer,
GetTriangleIndexBuffer(),
D3DFMT_INDEX16,
LocalVBuffer,
s_pBaseBufferPointer,
stride)))
{
DumpBadShaders();
@ -280,9 +280,9 @@ void VertexManager::DrawVA(int stride)
if (FAILED(D3D::dev->DrawIndexedPrimitiveUP(
D3DPT_LINELIST,
0, IndexGenerator::GetNumVerts(), IndexGenerator::GetNumLines(),
LIBuffer,
GetLineIndexBuffer(),
D3DFMT_INDEX16,
LocalVBuffer,
s_pBaseBufferPointer,
stride)))
{
DumpBadShaders();
@ -294,9 +294,9 @@ void VertexManager::DrawVA(int stride)
if (FAILED(D3D::dev->DrawIndexedPrimitiveUP(
D3DPT_POINTLIST,
0, IndexGenerator::GetNumVerts(), IndexGenerator::GetNumPoints(),
PIBuffer,
GetPointIndexBuffer(),
D3DFMT_INDEX16,
LocalVBuffer,
s_pBaseBufferPointer,
stride)))
{
DumpBadShaders();
@ -307,11 +307,6 @@ void VertexManager::DrawVA(int stride)
void VertexManager::vFlush()
{
if (LocalVBuffer == s_pCurBufferPointer) return;
if (Flushed) return;
Flushed = true;
VideoFifo_CheckEFBAccess();
u32 usedtextures = 0;
for (u32 i = 0; i < (u32)bpmem.genMode.numtevstages + 1; ++i)
if (bpmem.tevorders[i / 2].getEnable(i & 1))
@ -388,7 +383,6 @@ shader_fail:
CurrentIBufferIndex += IndexGenerator::GetTriangleindexLen() + IndexGenerator::GetLineindexLen() + IndexGenerator::GetPointindexLen();
CurrentVBufferIndex += IndexGenerator::GetNumVerts() * stride;
}
ResetBuffer();
}
}

View File

@ -84,27 +84,23 @@ void VertexManager::Draw()
{
if (IndexGenerator::GetNumTriangles() > 0)
{
glDrawElements(GL_TRIANGLES, IndexGenerator::GetTriangleindexLen(), GL_UNSIGNED_SHORT, TIBuffer);
glDrawElements(GL_TRIANGLES, IndexGenerator::GetTriangleindexLen(), GL_UNSIGNED_SHORT, GetTriangleIndexBuffer());
INCSTAT(stats.thisFrame.numIndexedDrawCalls);
}
if (IndexGenerator::GetNumLines() > 0)
{
glDrawElements(GL_LINES, IndexGenerator::GetLineindexLen(), GL_UNSIGNED_SHORT, LIBuffer);
glDrawElements(GL_LINES, IndexGenerator::GetLineindexLen(), GL_UNSIGNED_SHORT, GetLineIndexBuffer());
INCSTAT(stats.thisFrame.numIndexedDrawCalls);
}
if (IndexGenerator::GetNumPoints() > 0)
{
glDrawElements(GL_POINTS, IndexGenerator::GetPointindexLen(), GL_UNSIGNED_SHORT, PIBuffer);
glDrawElements(GL_POINTS, IndexGenerator::GetPointindexLen(), GL_UNSIGNED_SHORT, GetPointIndexBuffer());
INCSTAT(stats.thisFrame.numIndexedDrawCalls);
}
}
void VertexManager::vFlush()
{
if (LocalVBuffer == s_pCurBufferPointer) return;
if (Flushed) return;
Flushed=true;
VideoFifo_CheckEFBAccess();
#if defined(_DEBUG) || defined(DEBUGFAST)
PRIM_LOG("frame%d:\n texgen=%d, numchan=%d, dualtex=%d, ztex=%d, cole=%d, alpe=%d, ze=%d", g_ActiveConfig.iSaveTargetId, xfregs.numTexGen.numTexGens,
xfregs.numChan.numColorChans, xfregs.dualTexTrans.enabled, bpmem.ztex2.op,
@ -136,7 +132,7 @@ void VertexManager::vFlush()
(void)GL_REPORT_ERROR();
//glBindBuffer(GL_ARRAY_BUFFER, s_vboBuffers[s_nCurVBOIndex]);
//glBufferData(GL_ARRAY_BUFFER, s_pCurBufferPointer - LocalVBuffer, LocalVBuffer, GL_STREAM_DRAW);
//glBufferData(GL_ARRAY_BUFFER, s_pCurBufferPointer - s_pBaseBufferPointer, s_pBaseBufferPointer, GL_STREAM_DRAW);
GL_REPORT_ERRORD();
// setup the pointers
@ -244,8 +240,6 @@ void VertexManager::vFlush()
GFX_DEBUGGER_PAUSE_AT(NEXT_FLUSH, true);
//s_nCurVBOIndex = (s_nCurVBOIndex + 1) % ARRAYSIZE(s_vboBuffers);
s_pCurBufferPointer = LocalVBuffer;
IndexGenerator::Start(TIBuffer,LIBuffer,PIBuffer);
#if defined(_DEBUG) || defined(DEBUGFAST)
if (g_ActiveConfig.iLog & CONF_SAVESHADERS)