vertexloader_pos cleanup - remove the branch per vertex. not much of a speedup though.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@2256 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
hrydgard 2009-02-15 14:46:17 +00:00
parent 6deb87c176
commit c49f969563
4 changed files with 119 additions and 136 deletions

View File

@ -79,10 +79,26 @@ inline u16 DataRead()
return tmp; return tmp;
} }
template <>
inline s16 DataRead()
{
s16 tmp = (s16)Common::swap16(*(u16*)g_pVideoData);
g_pVideoData += 2;
return tmp;
}
template <> template <>
inline u32 DataRead() inline u32 DataRead()
{ {
u32 tmp = Common::swap32(*(u32*)g_pVideoData); u32 tmp = (u32)Common::swap32(*(u32*)g_pVideoData);
g_pVideoData += 4;
return tmp;
}
template <>
inline s32 DataRead()
{
s32 tmp = (s32)Common::swap32(*(u32*)g_pVideoData);
g_pVideoData += 4; g_pVideoData += 4;
return tmp; return tmp;
} }

View File

@ -211,22 +211,22 @@ void VertexLoader::CompileVertexTranslator()
case NOT_PRESENT: {_assert_msg_(0, "Vertex descriptor without position!", "WTF?");} break; case NOT_PRESENT: {_assert_msg_(0, "Vertex descriptor without position!", "WTF?");} break;
case DIRECT: case DIRECT:
switch (m_VtxAttr.PosFormat) { switch (m_VtxAttr.PosFormat) {
case FORMAT_UBYTE: m_VertexSize += m_VtxAttr.PosElements?3:2; WriteCall(Pos_ReadDirect_UByte); break; case FORMAT_UBYTE: m_VertexSize += m_VtxAttr.PosElements?3:2; WriteCall(m_VtxAttr.PosElements?Pos_ReadDirect_UByte3:Pos_ReadDirect_UByte2); break;
case FORMAT_BYTE: m_VertexSize += m_VtxAttr.PosElements?3:2; WriteCall(Pos_ReadDirect_Byte); break; case FORMAT_BYTE: m_VertexSize += m_VtxAttr.PosElements?3:2; WriteCall(m_VtxAttr.PosElements?Pos_ReadDirect_Byte3:Pos_ReadDirect_Byte2); break;
case FORMAT_USHORT: m_VertexSize += m_VtxAttr.PosElements?6:4; WriteCall(Pos_ReadDirect_UShort); break; case FORMAT_USHORT: m_VertexSize += m_VtxAttr.PosElements?6:4; WriteCall(m_VtxAttr.PosElements?Pos_ReadDirect_UShort3:Pos_ReadDirect_UShort2); break;
case FORMAT_SHORT: m_VertexSize += m_VtxAttr.PosElements?6:4; WriteCall(Pos_ReadDirect_Short); break; case FORMAT_SHORT: m_VertexSize += m_VtxAttr.PosElements?6:4; WriteCall(m_VtxAttr.PosElements?Pos_ReadDirect_Short3:Pos_ReadDirect_Short2); break;
case FORMAT_FLOAT: m_VertexSize += m_VtxAttr.PosElements?12:8; WriteCall(Pos_ReadDirect_Float); break; case FORMAT_FLOAT: m_VertexSize += m_VtxAttr.PosElements?12:8; WriteCall(m_VtxAttr.PosElements?Pos_ReadDirect_Float3:Pos_ReadDirect_Float2); break;
default: _assert_(0); break; default: _assert_(0); break;
} }
nat_offset += 12; nat_offset += 12;
break; break;
case INDEX8: case INDEX8:
switch (m_VtxAttr.PosFormat) { switch (m_VtxAttr.PosFormat) {
case FORMAT_UBYTE: WriteCall(Pos_ReadIndex8_UByte); break; //WTF? case FORMAT_UBYTE: WriteCall(m_VtxAttr.PosElements?Pos_ReadIndex8_UByte3:Pos_ReadIndex8_UByte2); break; //WTF?
case FORMAT_BYTE: WriteCall(Pos_ReadIndex8_Byte); break; case FORMAT_BYTE: WriteCall(m_VtxAttr.PosElements?Pos_ReadIndex8_Byte3:Pos_ReadIndex8_Byte2); break;
case FORMAT_USHORT: WriteCall(Pos_ReadIndex8_UShort); break; case FORMAT_USHORT: WriteCall(m_VtxAttr.PosElements?Pos_ReadIndex8_UShort3:Pos_ReadIndex8_UShort2); break;
case FORMAT_SHORT: WriteCall(Pos_ReadIndex8_Short); break; case FORMAT_SHORT: WriteCall(m_VtxAttr.PosElements?Pos_ReadIndex8_Short3:Pos_ReadIndex8_Short2); break;
case FORMAT_FLOAT: WriteCall(Pos_ReadIndex8_Float); break; case FORMAT_FLOAT: WriteCall(m_VtxAttr.PosElements?Pos_ReadIndex8_Float3:Pos_ReadIndex8_Float2); break;
default: _assert_(0); break; default: _assert_(0); break;
} }
m_VertexSize += 1; m_VertexSize += 1;
@ -234,11 +234,11 @@ void VertexLoader::CompileVertexTranslator()
break; break;
case INDEX16: case INDEX16:
switch (m_VtxAttr.PosFormat) { switch (m_VtxAttr.PosFormat) {
case FORMAT_UBYTE: WriteCall(Pos_ReadIndex16_UByte); break; case FORMAT_UBYTE: WriteCall(m_VtxAttr.PosElements?Pos_ReadIndex16_UByte3:Pos_ReadIndex16_UByte2); break;
case FORMAT_BYTE: WriteCall(Pos_ReadIndex16_Byte); break; case FORMAT_BYTE: WriteCall(m_VtxAttr.PosElements?Pos_ReadIndex16_Byte3:Pos_ReadIndex16_Byte2); break;
case FORMAT_USHORT: WriteCall(Pos_ReadIndex16_UShort); break; case FORMAT_USHORT: WriteCall(m_VtxAttr.PosElements?Pos_ReadIndex16_UShort3:Pos_ReadIndex16_UShort2); break;
case FORMAT_SHORT: WriteCall(Pos_ReadIndex16_Short); break; case FORMAT_SHORT: WriteCall(m_VtxAttr.PosElements?Pos_ReadIndex16_Short3:Pos_ReadIndex16_Short2); break;
case FORMAT_FLOAT: WriteCall(Pos_ReadIndex16_Float); break; case FORMAT_FLOAT: WriteCall(m_VtxAttr.PosElements?Pos_ReadIndex16_Float3:Pos_ReadIndex16_Float2); break;
default: _assert_(0); break; default: _assert_(0); break;
} }
m_VertexSize += 2; m_VertexSize += 2;

View File

@ -76,74 +76,58 @@ MOVUPS(MOffset(EDI, 0), XMM0);
// ============================================================================== // ==============================================================================
// Direct // Direct
// ============================================================================== // ==============================================================================
void LOADERDECL Pos_ReadDirect_UByte()
{
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU8() * posScale;
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)DataReadU8() * posScale;
if (pVtxAttr->PosElements)
((float*)VertexManager::s_pCurBufferPointer)[2] = (float)DataReadU8() * posScale;
else
((float*)VertexManager::s_pCurBufferPointer)[2] = 1.0f;
LOG_VTX();
VertexManager::s_pCurBufferPointer += 12;
}
void LOADERDECL Pos_ReadDirect_Byte() template <class T, bool three>
{ void Pos_ReadDirect()
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s8)DataReadU8() * posScale;
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s8)DataReadU8() * posScale;
if (pVtxAttr->PosElements)
((float*)VertexManager::s_pCurBufferPointer)[2] = (float)(s8)DataReadU8() * posScale;
else
((float*)VertexManager::s_pCurBufferPointer)[2] = 1.0;
LOG_VTX();
VertexManager::s_pCurBufferPointer += 12;
}
void LOADERDECL Pos_ReadDirect_UShort()
{ {
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)DataReadU16() * posScale; ((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(T)DataRead<T>() * posScale;
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)DataReadU16() * posScale; ((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(T)DataRead<T>() * posScale;
if (pVtxAttr->PosElements) if (three)
((float*)VertexManager::s_pCurBufferPointer)[2] = (float)DataReadU16() * posScale; ((float*)VertexManager::s_pCurBufferPointer)[2] = (float)(T)DataRead<T>() * posScale;
else else
((float*)VertexManager::s_pCurBufferPointer)[2] = 1.0f; ((float*)VertexManager::s_pCurBufferPointer)[2] = 1.0f;
LOG_VTX(); LOG_VTX();
VertexManager::s_pCurBufferPointer += 12; VertexManager::s_pCurBufferPointer += 12;
} }
void LOADERDECL Pos_ReadDirect_Short() void LOADERDECL Pos_ReadDirect_UByte3() { Pos_ReadDirect<u8, true>(); }
{ void LOADERDECL Pos_ReadDirect_Byte3() { Pos_ReadDirect<s8, true>(); }
((float*)VertexManager::s_pCurBufferPointer)[0] = (float)(s16)DataReadU16() * posScale; void LOADERDECL Pos_ReadDirect_UShort3() { Pos_ReadDirect<u16, true>(); }
((float*)VertexManager::s_pCurBufferPointer)[1] = (float)(s16)DataReadU16() * posScale; void LOADERDECL Pos_ReadDirect_Short3() { Pos_ReadDirect<s16, true>(); }
if (pVtxAttr->PosElements)
((float*)VertexManager::s_pCurBufferPointer)[2] = (float)(s16)DataReadU16() * posScale;
else
((float*)VertexManager::s_pCurBufferPointer)[2] = 1.0f;
LOG_VTX();
VertexManager::s_pCurBufferPointer += 12;
}
void LOADERDECL Pos_ReadDirect_Float() void LOADERDECL Pos_ReadDirect_UByte2() { Pos_ReadDirect<u8, false>(); }
void LOADERDECL Pos_ReadDirect_Byte2() { Pos_ReadDirect<s8, false>(); }
void LOADERDECL Pos_ReadDirect_UShort2() { Pos_ReadDirect<u16, false>(); }
void LOADERDECL Pos_ReadDirect_Short2() { Pos_ReadDirect<s16, false>(); }
void LOADERDECL Pos_ReadDirect_Float3()
{ {
// No need to use floating point here. // No need to use floating point here.
((u32 *)VertexManager::s_pCurBufferPointer)[0] = DataReadU32(); ((u32 *)VertexManager::s_pCurBufferPointer)[0] = DataReadU32();
((u32 *)VertexManager::s_pCurBufferPointer)[1] = DataReadU32(); ((u32 *)VertexManager::s_pCurBufferPointer)[1] = DataReadU32();
if (pVtxAttr->PosElements) ((u32 *)VertexManager::s_pCurBufferPointer)[2] = DataReadU32();
((u32 *)VertexManager::s_pCurBufferPointer)[2] = DataReadU32();
else
((float*)VertexManager::s_pCurBufferPointer)[2] = 1.0f;
LOG_VTX(); LOG_VTX();
VertexManager::s_pCurBufferPointer += 12; VertexManager::s_pCurBufferPointer += 12;
} }
template<class T> void LOADERDECL Pos_ReadDirect_Float2()
{
// No need to use floating point here.
((u32 *)VertexManager::s_pCurBufferPointer)[0] = DataReadU32();
((u32 *)VertexManager::s_pCurBufferPointer)[1] = DataReadU32();
((u32 *)VertexManager::s_pCurBufferPointer)[2] = 0x3f800000; // 1.0f
LOG_VTX();
VertexManager::s_pCurBufferPointer += 12;
}
template<class T, bool three>
inline void Pos_ReadIndex_Byte(int Index) inline void Pos_ReadIndex_Byte(int Index)
{ {
const u8* pData = cached_arraybases[ARRAY_POSITION] + ((u32)Index * arraystrides[ARRAY_POSITION]); const u8* pData = cached_arraybases[ARRAY_POSITION] + ((u32)Index * arraystrides[ARRAY_POSITION]);
((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(T)(pData[0])) * posScale; ((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(T)(pData[0])) * posScale;
((float*)VertexManager::s_pCurBufferPointer)[1] = ((float)(T)(pData[1])) * posScale; ((float*)VertexManager::s_pCurBufferPointer)[1] = ((float)(T)(pData[1])) * posScale;
if (pVtxAttr->PosElements) if (three)
((float*)VertexManager::s_pCurBufferPointer)[2] = ((float)(T)(pData[2])) * posScale; ((float*)VertexManager::s_pCurBufferPointer)[2] = ((float)(T)(pData[2])) * posScale;
else else
((float*)VertexManager::s_pCurBufferPointer)[2] = 1.0f; ((float*)VertexManager::s_pCurBufferPointer)[2] = 1.0f;
@ -151,13 +135,13 @@ inline void Pos_ReadIndex_Byte(int Index)
VertexManager::s_pCurBufferPointer += 12; VertexManager::s_pCurBufferPointer += 12;
} }
template<class T> template<class T, bool three>
inline void Pos_ReadIndex_Short(int Index) inline void Pos_ReadIndex_Short(int Index)
{ {
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_POSITION] + ((u32)Index * arraystrides[ARRAY_POSITION])); const u16* pData = (const u16 *)(cached_arraybases[ARRAY_POSITION] + ((u32)Index * arraystrides[ARRAY_POSITION]));
((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(T)Common::swap16(pData[0])) * posScale; ((float*)VertexManager::s_pCurBufferPointer)[0] = ((float)(T)Common::swap16(pData[0])) * posScale;
((float*)VertexManager::s_pCurBufferPointer)[1] = ((float)(T)Common::swap16(pData[1])) * posScale; ((float*)VertexManager::s_pCurBufferPointer)[1] = ((float)(T)Common::swap16(pData[1])) * posScale;
if (pVtxAttr->PosElements) if (three)
((float*)VertexManager::s_pCurBufferPointer)[2] = ((float)(T)Common::swap16(pData[2])) * posScale; ((float*)VertexManager::s_pCurBufferPointer)[2] = ((float)(T)Common::swap16(pData[2])) * posScale;
else else
((float*)VertexManager::s_pCurBufferPointer)[2] = 1.0f; ((float*)VertexManager::s_pCurBufferPointer)[2] = 1.0f;
@ -165,12 +149,13 @@ inline void Pos_ReadIndex_Short(int Index)
VertexManager::s_pCurBufferPointer += 12; VertexManager::s_pCurBufferPointer += 12;
} }
template<bool three>
inline void Pos_ReadIndex_Float(int Index) inline void Pos_ReadIndex_Float(int Index)
{ {
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (Index * arraystrides[ARRAY_POSITION])); const u32* pData = (const u32 *)(cached_arraybases[ARRAY_POSITION] + (Index * arraystrides[ARRAY_POSITION]));
((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]); ((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]);
((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]); ((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]);
if (pVtxAttr->PosElements) if (three)
((u32*)VertexManager::s_pCurBufferPointer)[2] = Common::swap32(pData[2]); ((u32*)VertexManager::s_pCurBufferPointer)[2] = Common::swap32(pData[2]);
else else
((float*)VertexManager::s_pCurBufferPointer)[2] = 1.0f; ((float*)VertexManager::s_pCurBufferPointer)[2] = 1.0f;
@ -181,65 +166,29 @@ inline void Pos_ReadIndex_Float(int Index)
// ============================================================================== // ==============================================================================
// Index 8 // Index 8
// ============================================================================== // ==============================================================================
void LOADERDECL Pos_ReadIndex8_UByte() void LOADERDECL Pos_ReadIndex8_UByte3() {Pos_ReadIndex_Byte<u8, true> (DataReadU8());}
{ void LOADERDECL Pos_ReadIndex8_Byte3() {Pos_ReadIndex_Byte<s8, true> (DataReadU8());}
u8 Index = DataReadU8(); void LOADERDECL Pos_ReadIndex8_UShort3() {Pos_ReadIndex_Short<u16, true> (DataReadU8());}
Pos_ReadIndex_Byte<u8>(Index); void LOADERDECL Pos_ReadIndex8_Short3() {Pos_ReadIndex_Short<s16, true> (DataReadU8());}
} void LOADERDECL Pos_ReadIndex8_Float3() {Pos_ReadIndex_Float<true> (DataReadU8());}
void LOADERDECL Pos_ReadIndex8_UByte2() {Pos_ReadIndex_Byte<u8, false>(DataReadU8());}
void LOADERDECL Pos_ReadIndex8_Byte() void LOADERDECL Pos_ReadIndex8_Byte2() {Pos_ReadIndex_Byte<s8, false>(DataReadU8());}
{ void LOADERDECL Pos_ReadIndex8_UShort2() {Pos_ReadIndex_Short<u16, false>(DataReadU8());}
u8 Index = DataReadU8(); void LOADERDECL Pos_ReadIndex8_Short2() {Pos_ReadIndex_Short<s16, false>(DataReadU8());}
Pos_ReadIndex_Byte<s8>(Index); void LOADERDECL Pos_ReadIndex8_Float2() {Pos_ReadIndex_Float<false> (DataReadU8());}
}
void LOADERDECL Pos_ReadIndex8_UShort()
{
u8 Index = DataReadU8();
Pos_ReadIndex_Short<u16>(Index);
}
void LOADERDECL Pos_ReadIndex8_Short()
{
u8 Index = DataReadU8();
Pos_ReadIndex_Short<s16>(Index);
}
void LOADERDECL Pos_ReadIndex8_Float()
{
u8 Index = DataReadU8();
Pos_ReadIndex_Float(Index);
}
// ============================================================================== // ==============================================================================
// Index 16 // Index 16
// ============================================================================== // ==============================================================================
void LOADERDECL Pos_ReadIndex16_UByte3() {Pos_ReadIndex_Byte<u8, true> (DataReadU16());}
void LOADERDECL Pos_ReadIndex16_UByte(){ void LOADERDECL Pos_ReadIndex16_Byte3() {Pos_ReadIndex_Byte<s8, true> (DataReadU16());}
u16 Index = DataReadU16(); void LOADERDECL Pos_ReadIndex16_UShort3() {Pos_ReadIndex_Short<u16, true> (DataReadU16());}
Pos_ReadIndex_Byte<u8>(Index); void LOADERDECL Pos_ReadIndex16_Short3() {Pos_ReadIndex_Short<s16, true> (DataReadU16());}
} void LOADERDECL Pos_ReadIndex16_Float3() {Pos_ReadIndex_Float<true> (DataReadU16());}
void LOADERDECL Pos_ReadIndex16_UByte2() {Pos_ReadIndex_Byte<u8, false>(DataReadU16());}
void LOADERDECL Pos_ReadIndex16_Byte(){ void LOADERDECL Pos_ReadIndex16_Byte2() {Pos_ReadIndex_Byte<s8, false>(DataReadU16());}
u16 Index = DataReadU16(); void LOADERDECL Pos_ReadIndex16_UShort2() {Pos_ReadIndex_Short<u16, false>(DataReadU16());}
Pos_ReadIndex_Byte<s8>(Index); void LOADERDECL Pos_ReadIndex16_Short2() {Pos_ReadIndex_Short<s16, false>(DataReadU16());}
} void LOADERDECL Pos_ReadIndex16_Float2() {Pos_ReadIndex_Float<false> (DataReadU16());}
void LOADERDECL Pos_ReadIndex16_UShort(){
u16 Index = DataReadU16();
Pos_ReadIndex_Short<u16>(Index);
}
void LOADERDECL Pos_ReadIndex16_Short()
{
u16 Index = DataReadU16();
Pos_ReadIndex_Short<s16>(Index);
}
void LOADERDECL Pos_ReadIndex16_Float()
{
u16 Index = DataReadU16();
Pos_ReadIndex_Float(Index);
}
#endif #endif

View File

@ -18,22 +18,40 @@
#ifndef VERTEXLOADER_POSITION_H #ifndef VERTEXLOADER_POSITION_H
#define VERTEXLOADER_POSITION_H #define VERTEXLOADER_POSITION_H
void LOADERDECL Pos_ReadDirect_UByte(); void LOADERDECL Pos_ReadDirect_UByte3();
void LOADERDECL Pos_ReadDirect_Byte(); void LOADERDECL Pos_ReadDirect_Byte3();
void LOADERDECL Pos_ReadDirect_UShort(); void LOADERDECL Pos_ReadDirect_UShort3();
void LOADERDECL Pos_ReadDirect_Short(); void LOADERDECL Pos_ReadDirect_Short3();
void LOADERDECL Pos_ReadDirect_Float(); void LOADERDECL Pos_ReadDirect_Float3();
void LOADERDECL Pos_ReadIndex8_UByte(); void LOADERDECL Pos_ReadIndex8_UByte3();
void LOADERDECL Pos_ReadIndex8_Byte(); void LOADERDECL Pos_ReadIndex8_Byte3();
void LOADERDECL Pos_ReadIndex8_UShort(); void LOADERDECL Pos_ReadIndex8_UShort3();
void LOADERDECL Pos_ReadIndex8_Short(); void LOADERDECL Pos_ReadIndex8_Short3();
void LOADERDECL Pos_ReadIndex8_Float(); void LOADERDECL Pos_ReadIndex8_Float3();
void LOADERDECL Pos_ReadIndex16_UByte(); void LOADERDECL Pos_ReadIndex16_UByte3();
void LOADERDECL Pos_ReadIndex16_Byte(); void LOADERDECL Pos_ReadIndex16_Byte3();
void LOADERDECL Pos_ReadIndex16_UShort(); void LOADERDECL Pos_ReadIndex16_UShort3();
void LOADERDECL Pos_ReadIndex16_Short(); void LOADERDECL Pos_ReadIndex16_Short3();
void LOADERDECL Pos_ReadIndex16_Float(); void LOADERDECL Pos_ReadIndex16_Float3();
void LOADERDECL Pos_ReadDirect_UByte2();
void LOADERDECL Pos_ReadDirect_Byte2();
void LOADERDECL Pos_ReadDirect_UShort2();
void LOADERDECL Pos_ReadDirect_Short2();
void LOADERDECL Pos_ReadDirect_Float2();
void LOADERDECL Pos_ReadIndex8_UByte2();
void LOADERDECL Pos_ReadIndex8_Byte2();
void LOADERDECL Pos_ReadIndex8_UShort2();
void LOADERDECL Pos_ReadIndex8_Short2();
void LOADERDECL Pos_ReadIndex8_Float2();
void LOADERDECL Pos_ReadIndex16_UByte2();
void LOADERDECL Pos_ReadIndex16_Byte2();
void LOADERDECL Pos_ReadIndex16_UShort2();
void LOADERDECL Pos_ReadIndex16_Short2();
void LOADERDECL Pos_ReadIndex16_Float2();
#endif #endif