rework the normal loading now it should be a accurate as real hardware.

more accurate means more slow so no complains about speed lost, will optimize later.
please test for regression, but i hope you will get a nice surprise about this commit :).

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@6881 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
Rodolfo Osvaldo Bogado 2011-01-19 13:57:15 +00:00
parent 5aa34d0483
commit 0416a1b567
3 changed files with 98 additions and 361 deletions

View File

@ -311,55 +311,16 @@ void VertexLoader::CompileVertexTranslator()
vtx_decl.normal_offset[0] = -1; vtx_decl.normal_offset[0] = -1;
vtx_decl.normal_offset[1] = -1; vtx_decl.normal_offset[1] = -1;
vtx_decl.normal_offset[2] = -1; vtx_decl.normal_offset[2] = -1;
switch (vtx_attr.NormalFormat) { vtx_decl.normal_gl_type = VAR_FLOAT;
case FORMAT_UBYTE: vtx_decl.normal_gl_size = 3;
case FORMAT_BYTE: vtx_decl.normal_offset[0] = nat_offset;
{ nat_offset += 12;
vtx_decl.normal_gl_type = VAR_BYTE; if (vtx_attr.NormalElements) {
int native_size = 4; vtx_decl.normal_offset[1] = nat_offset;
if (!g_Config.backend_info.bAllowSignedBytes)
{
vtx_decl.normal_gl_type = VAR_SHORT;
native_size = 8;
}
vtx_decl.normal_gl_size = 4;
vtx_decl.normal_offset[0] = nat_offset;
nat_offset += native_size;
if (vtx_attr.NormalElements) {
vtx_decl.normal_offset[1] = nat_offset;
nat_offset += native_size;
vtx_decl.normal_offset[2] = nat_offset;
nat_offset += native_size;
}
break;
}
case FORMAT_USHORT:
case FORMAT_SHORT:
vtx_decl.normal_gl_type = VAR_SHORT;
vtx_decl.normal_gl_size = 4;
vtx_decl.normal_offset[0] = nat_offset;
nat_offset += 8;
if (vtx_attr.NormalElements) {
vtx_decl.normal_offset[1] = nat_offset;
nat_offset += 8;
vtx_decl.normal_offset[2] = nat_offset;
nat_offset += 8;
}
break;
case FORMAT_FLOAT:
vtx_decl.normal_gl_type = VAR_FLOAT;
vtx_decl.normal_gl_size = 3;
vtx_decl.normal_offset[0] = nat_offset;
nat_offset += 12; nat_offset += 12;
if (vtx_attr.NormalElements) { vtx_decl.normal_offset[2] = nat_offset;
vtx_decl.normal_offset[1] = nat_offset; nat_offset += 12;
nat_offset += 12; }
vtx_decl.normal_offset[2] = nat_offset;
nat_offset += 12;
}
break;
default: _assert_(0); break;
}
int numNormals = (m_VtxAttr.NormalElements == 1) ? NRM_THREE : NRM_ONE; int numNormals = (m_VtxAttr.NormalElements == 1) ? NRM_THREE : NRM_ONE;
m_NativeFmt->m_components |= VB_HAS_NRM0; m_NativeFmt->m_components |= VB_HAS_NRM0;

View File

@ -26,12 +26,11 @@
#include <tmmintrin.h> #include <tmmintrin.h>
#endif #endif
#define LOG_NORM8() // PRIM_LOG("norm: %f %f %f, ", ((s8*)VertexManager::s_pCurBufferPointer)[-3]/127.0f, ((s8*)VertexManager::s_pCurBufferPointer)[-2]/127.0f, ((s8*)VertexManager::s_pCurBufferPointer)[-1]/127.0f); #define LOG_NORM8() // PRIM_LOG("norm: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-3], ((float*)VertexManager::s_pCurBufferPointer)[-2], ((float*)VertexManager::s_pCurBufferPointer)[-1]);
#define LOG_NORM16() // PRIM_LOG("norm: %f %f %f, ", ((s16*)VertexManager::s_pCurBufferPointer)[-3]/32767.0f, ((s16*)VertexManager::s_pCurBufferPointer)[-2]/32767.0f, ((s16*)VertexManager::s_pCurBufferPointer)[-1]/32767.0f); #define LOG_NORM16() // PRIM_LOG("norm: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-3], ((float*)VertexManager::s_pCurBufferPointer)[-2], ((float*)VertexManager::s_pCurBufferPointer)[-1]);
#define LOG_NORMF() // PRIM_LOG("norm: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-3], ((float*)VertexManager::s_pCurBufferPointer)[-2], ((float*)VertexManager::s_pCurBufferPointer)[-1]); #define LOG_NORMF() // PRIM_LOG("norm: %f %f %f, ", ((float*)VertexManager::s_pCurBufferPointer)[-3], ((float*)VertexManager::s_pCurBufferPointer)[-2], ((float*)VertexManager::s_pCurBufferPointer)[-1]);
VertexLoader_Normal::Set VertexLoader_Normal::m_Table[NUM_NRM_TYPE][NUM_NRM_INDICES][NUM_NRM_ELEMENTS][NUM_NRM_FORMAT]; VertexLoader_Normal::Set VertexLoader_Normal::m_Table[NUM_NRM_TYPE][NUM_NRM_INDICES][NUM_NRM_ELEMENTS][NUM_NRM_FORMAT];
VertexLoader_Normal::Set VertexLoader_Normal::m_TableExpand16[NUM_NRM_TYPE][NUM_NRM_INDICES][NUM_NRM_ELEMENTS][NUM_NRM_FORMAT];
void VertexLoader_Normal::Init(void) void VertexLoader_Normal::Init(void)
{ {
@ -101,32 +100,6 @@ void VertexLoader_Normal::Init(void)
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Set(6, Normal_Index16_Short3_Indices3); //HACK m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_USHORT] = Set(6, Normal_Index16_Short3_Indices3); //HACK
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Set(6, Normal_Index16_Short3_Indices3); m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_SHORT] = Set(6, Normal_Index16_Short3_Indices3);
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Set(6, Normal_Index16_Float3_Indices3); m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_FLOAT] = Set(6, Normal_Index16_Float3_Indices3);
// Work around D3D's lack of signed bytes
m_TableExpand16[NRM_DIRECT] [NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Set(3, Normal_DirectByte_Expand16);
m_TableExpand16[NRM_DIRECT] [NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Set(9, Normal_DirectByte3_Expand16);
m_TableExpand16[NRM_DIRECT] [NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Set(3, Normal_DirectByte_Expand16);
m_TableExpand16[NRM_DIRECT] [NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Set(9, Normal_DirectByte3_Expand16);
m_TableExpand16[NRM_INDEX8] [NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Set(1, Normal_Index8_Byte_Expand16);
m_TableExpand16[NRM_INDEX8] [NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Set(1, Normal_Index8_Byte3_Indices1_Expand16);
m_TableExpand16[NRM_INDEX8] [NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Set(1, Normal_Index8_Byte_Expand16);
m_TableExpand16[NRM_INDEX8] [NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Set(3, Normal_Index8_Byte3_Indices3_Expand16);
m_TableExpand16[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_BYTE] = Set(2, Normal_Index16_Byte_Expand16);
m_TableExpand16[NRM_INDEX16][NRM_INDICES1][NRM_NBT3][FORMAT_BYTE] = Set(2, Normal_Index16_Byte3_Indices1_Expand16);
m_TableExpand16[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_BYTE] = Set(2, Normal_Index16_Byte_Expand16);
m_TableExpand16[NRM_INDEX16][NRM_INDICES3][NRM_NBT3][FORMAT_BYTE] = Set(6, Normal_Index16_Byte3_Indices3_Expand16);
#if _M_SSE >= 0x301
if (cpu_info.bSSSE3) {
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_USHORT] = Set(2, Normal_Index16_Short_SSSE3); //HACK
m_Table[NRM_INDEX16][NRM_INDICES1][NRM_NBT] [FORMAT_SHORT] = Set(2, Normal_Index16_Short_SSSE3);
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_USHORT] = Set(2, Normal_Index16_Short_SSSE3); //HACK
m_Table[NRM_INDEX16][NRM_INDICES3][NRM_NBT] [FORMAT_SHORT] = Set(2, Normal_Index16_Short_SSSE3);
}
#endif
} }
unsigned int VertexLoader_Normal::GetSize(unsigned int _type, unsigned int _format, unsigned int _elements, unsigned int _index3) unsigned int VertexLoader_Normal::GetSize(unsigned int _type, unsigned int _format, unsigned int _elements, unsigned int _index3)
@ -136,85 +109,90 @@ unsigned int VertexLoader_Normal::GetSize(unsigned int _type, unsigned int _form
TPipelineFunction VertexLoader_Normal::GetFunction(unsigned int _type, unsigned int _format, unsigned int _elements, unsigned int _index3, bool allow_signed_bytes) TPipelineFunction VertexLoader_Normal::GetFunction(unsigned int _type, unsigned int _format, unsigned int _elements, unsigned int _index3, bool allow_signed_bytes)
{ {
if (!allow_signed_bytes) TPipelineFunction pFunc = m_Table[_type][_index3][_elements][_format].function;
{
TPipelineFunction pFunc = m_TableExpand16[_type][_index3][_elements][_format].function;
if (pFunc) return pFunc;
}
TPipelineFunction pFunc = m_Table[_type][_index3][_elements][_format].function;
return pFunc; return pFunc;
} }
// --- Expand s8 to s16 // This fracs are fixed acording to format
#define S8FRAC 0.015625f; // 1.0f / (1U << 6)
inline s16 Expands8Tos16(s8 input) #define S16FRAC 0.00006103515625f; // 1.0f / (1U << 14)
{
return (s16)((((s32)input) * 32767) / 127);
}
// --- Direct --- // --- Direct ---
void LOADERDECL VertexLoader_Normal::Normal_DirectByte()
inline void ReadDirectS8()
{ {
*VertexManager::s_pCurBufferPointer++ = DataReadU8(); ((float*)VertexManager::s_pCurBufferPointer)[0] = DataReadS8() * S8FRAC;
*VertexManager::s_pCurBufferPointer++ = DataReadU8(); ((float*)VertexManager::s_pCurBufferPointer)[1] = DataReadS8() * S8FRAC;
*VertexManager::s_pCurBufferPointer++ = DataReadU8(); ((float*)VertexManager::s_pCurBufferPointer)[2] = DataReadS8() * S8FRAC;
*VertexManager::s_pCurBufferPointer++ = 0; VertexManager::s_pCurBufferPointer += 12;
LOG_NORM8(); LOG_NORM8();
} }
void LOADERDECL VertexLoader_Normal::Normal_DirectByte_Expand16() inline void ReadDirectS16()
{ {
((s16*)VertexManager::s_pCurBufferPointer)[0] = Expands8Tos16(DataReadS8()); ((float*)VertexManager::s_pCurBufferPointer)[0] = ((s16)DataReadU16()) * S16FRAC;
((s16*)VertexManager::s_pCurBufferPointer)[1] = Expands8Tos16(DataReadS8()); ((float*)VertexManager::s_pCurBufferPointer)[0] = ((s16)DataReadU16()) * S16FRAC;
((s16*)VertexManager::s_pCurBufferPointer)[2] = Expands8Tos16(DataReadS8()); ((float*)VertexManager::s_pCurBufferPointer)[0] = ((s16)DataReadU16()) * S16FRAC;
((s16*)VertexManager::s_pCurBufferPointer)[3] = 0; VertexManager::s_pCurBufferPointer += 12;
VertexManager::s_pCurBufferPointer += 8;
LOG_NORM16();
}
void LOADERDECL VertexLoader_Normal::Normal_DirectShort()
{
((u16*)VertexManager::s_pCurBufferPointer)[0] = DataReadU16();
((u16*)VertexManager::s_pCurBufferPointer)[1] = DataReadU16();
((u16*)VertexManager::s_pCurBufferPointer)[2] = DataReadU16();
((u16*)VertexManager::s_pCurBufferPointer)[3] = 0;
VertexManager::s_pCurBufferPointer += 8;
LOG_NORM16() LOG_NORM16()
} }
void LOADERDECL VertexLoader_Normal::Normal_DirectFloat() inline void ReadDirectFloat()
{ {
((u32*)VertexManager::s_pCurBufferPointer)[0] = DataReadU32(); ((u32*)VertexManager::s_pCurBufferPointer)[0] = DataReadU32();
((u32*)VertexManager::s_pCurBufferPointer)[1] = DataReadU32(); ((u32*)VertexManager::s_pCurBufferPointer)[1] = DataReadU32();
((u32*)VertexManager::s_pCurBufferPointer)[2] = DataReadU32(); ((u32*)VertexManager::s_pCurBufferPointer)[2] = DataReadU32();
VertexManager::s_pCurBufferPointer += 12; VertexManager::s_pCurBufferPointer += 12;
LOG_NORMF() LOG_NORMF()
} }
inline void ReadIndirectS8(const s8* pData)
{
((float*)VertexManager::s_pCurBufferPointer)[0] = pData[0] * S8FRAC;
((float*)VertexManager::s_pCurBufferPointer)[1] = pData[1] * S8FRAC;
((float*)VertexManager::s_pCurBufferPointer)[2] = pData[2] * S8FRAC;
VertexManager::s_pCurBufferPointer += 12;
LOG_NORM8();
}
inline void ReadIndirectS16(const u16* pData)
{
((float*)VertexManager::s_pCurBufferPointer)[0] = ((s16)Common::swap16(pData[0])) * S16FRAC;
((float*)VertexManager::s_pCurBufferPointer)[1] = ((s16)Common::swap16(pData[1])) * S16FRAC;
((float*)VertexManager::s_pCurBufferPointer)[2] = ((s16)Common::swap16(pData[2])) * S16FRAC;
VertexManager::s_pCurBufferPointer += 12;
LOG_NORM16()
}
inline void ReadIndirectFloat(const u32* pData)
{
((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]);
((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]);
((u32*)VertexManager::s_pCurBufferPointer)[2] = Common::swap32(pData[2]);
VertexManager::s_pCurBufferPointer += 12;
LOG_NORMF();
}
void LOADERDECL VertexLoader_Normal::Normal_DirectByte()
{
ReadDirectS8();
}
void LOADERDECL VertexLoader_Normal::Normal_DirectShort()
{
ReadDirectS16();
}
void LOADERDECL VertexLoader_Normal::Normal_DirectFloat()
{
ReadDirectFloat();
}
void LOADERDECL VertexLoader_Normal::Normal_DirectByte3() void LOADERDECL VertexLoader_Normal::Normal_DirectByte3()
{ {
for (int i = 0; i < 3; i++) for (int i = 0; i < 3; i++)
{ {
*VertexManager::s_pCurBufferPointer++ = DataReadU8(); ReadDirectS8();
*VertexManager::s_pCurBufferPointer++ = DataReadU8();
*VertexManager::s_pCurBufferPointer++ = DataReadU8();
*VertexManager::s_pCurBufferPointer++ = 0;
LOG_NORM8();
}
}
void LOADERDECL VertexLoader_Normal::Normal_DirectByte3_Expand16()
{
for (int i = 0; i < 3; i++)
{
((u16*)VertexManager::s_pCurBufferPointer)[0] = Expands8Tos16(DataReadS8());
((u16*)VertexManager::s_pCurBufferPointer)[1] = Expands8Tos16(DataReadS8());
((u16*)VertexManager::s_pCurBufferPointer)[2] = Expands8Tos16(DataReadS8());
((u16*)VertexManager::s_pCurBufferPointer)[3] = 0;
VertexManager::s_pCurBufferPointer += 8;
LOG_NORM16();
} }
} }
@ -222,12 +200,7 @@ void LOADERDECL VertexLoader_Normal::Normal_DirectShort3()
{ {
for (int i = 0; i < 3; i++) for (int i = 0; i < 3; i++)
{ {
((u16*)VertexManager::s_pCurBufferPointer)[0] = DataReadU16(); ReadDirectS16();
((u16*)VertexManager::s_pCurBufferPointer)[1] = DataReadU16();
((u16*)VertexManager::s_pCurBufferPointer)[2] = DataReadU16();
((u16*)VertexManager::s_pCurBufferPointer)[3] = 0;
VertexManager::s_pCurBufferPointer += 8;
LOG_NORM16();
} }
} }
@ -235,11 +208,7 @@ void LOADERDECL VertexLoader_Normal::Normal_DirectFloat3()
{ {
for (int i = 0; i < 3; i++) for (int i = 0; i < 3; i++)
{ {
((u32*)VertexManager::s_pCurBufferPointer)[0] = DataReadU32(); ReadDirectFloat();
((u32*)VertexManager::s_pCurBufferPointer)[1] = DataReadU32();
((u32*)VertexManager::s_pCurBufferPointer)[2] = DataReadU32();
VertexManager::s_pCurBufferPointer += 12;
LOG_NORMF();
} }
} }
@ -247,106 +216,53 @@ void LOADERDECL VertexLoader_Normal::Normal_DirectFloat3()
// --- Index8 --- // --- Index8 ---
void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte() void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte()
{
u8 Index = DataReadU8();
const u8* pData = cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]);
*VertexManager::s_pCurBufferPointer++ = pData[0];
*VertexManager::s_pCurBufferPointer++ = pData[1];
*VertexManager::s_pCurBufferPointer++ = pData[2];
*VertexManager::s_pCurBufferPointer++ = 0;
LOG_NORM8();
}
void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte_Expand16()
{ {
u8 Index = DataReadU8(); u8 Index = DataReadU8();
const s8* pData = (const s8 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL])); const s8* pData = (const s8 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
((s16*)VertexManager::s_pCurBufferPointer)[0] = Expands8Tos16(pData[0]); ReadIndirectS8(pData);
((s16*)VertexManager::s_pCurBufferPointer)[1] = Expands8Tos16(pData[1]);
((s16*)VertexManager::s_pCurBufferPointer)[2] = Expands8Tos16(pData[2]);
((s16*)VertexManager::s_pCurBufferPointer)[3] = 0;
VertexManager::s_pCurBufferPointer += 8;
LOG_NORM16();
} }
void LOADERDECL VertexLoader_Normal::Normal_Index8_Short() void LOADERDECL VertexLoader_Normal::Normal_Index8_Short()
{ {
u8 Index = DataReadU8(); u8 Index = DataReadU8();
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL])); const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
((u16*)VertexManager::s_pCurBufferPointer)[0] = Common::swap16(pData[0]); ReadIndirectS16(pData);
((u16*)VertexManager::s_pCurBufferPointer)[1] = Common::swap16(pData[1]);
((u16*)VertexManager::s_pCurBufferPointer)[2] = Common::swap16(pData[2]);
((u16*)VertexManager::s_pCurBufferPointer)[3] = 0;
VertexManager::s_pCurBufferPointer += 8;
LOG_NORM16();
} }
void LOADERDECL VertexLoader_Normal::Normal_Index8_Float() void LOADERDECL VertexLoader_Normal::Normal_Index8_Float()
{ {
u8 Index = DataReadU8(); u8 Index = DataReadU8();
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL])); const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]); ReadIndirectFloat(pData);
((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]);
((u32*)VertexManager::s_pCurBufferPointer)[2] = Common::swap32(pData[2]);
VertexManager::s_pCurBufferPointer += 12;
LOG_NORMF();
} }
void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte3_Indices1() void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte3_Indices1()
{ {
u8 Index = DataReadU8(); u8 Index = DataReadU8();
const u8* pData = cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]); const s8* pData = (const s8 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
for (int i = 0; i < 3; i++) for (int i = 0; i < 3; i++)
{ {
*VertexManager::s_pCurBufferPointer++ = pData[3 * i]; ReadIndirectS8((const s8*)(&pData[3 * i]));
*VertexManager::s_pCurBufferPointer++ = pData[3 * i + 1];
*VertexManager::s_pCurBufferPointer++ = pData[3 * i + 2];
*VertexManager::s_pCurBufferPointer++ = 0;
LOG_NORM8();
} }
} }
void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte3_Indices1_Expand16()
{
u8 Index = DataReadU8();
const s8* pData = (const s8*)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
for (int i = 0; i < 3; i++)
{
((s16*)VertexManager::s_pCurBufferPointer)[0] = Expands8Tos16(pData[3 * i]);
((s16*)VertexManager::s_pCurBufferPointer)[1] = Expands8Tos16(pData[3 * i + 1]);
((s16*)VertexManager::s_pCurBufferPointer)[2] = Expands8Tos16(pData[3 * i + 2]);
((s16*)VertexManager::s_pCurBufferPointer)[3] = 0;
VertexManager::s_pCurBufferPointer += 8;
LOG_NORM16();
}
}
void LOADERDECL VertexLoader_Normal::Normal_Index8_Short3_Indices1() void LOADERDECL VertexLoader_Normal::Normal_Index8_Short3_Indices1()
{ {
u8 Index = DataReadU8(); u8 Index = DataReadU8();
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL])); const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
for (int i = 0; i < 3; i++) for (int i = 0; i < 3; i++)
{ {
((u16*)VertexManager::s_pCurBufferPointer)[0] = Common::swap16(pData[3 * i]); ReadIndirectS16((const u16*)(&pData[3 * i]));
((u16*)VertexManager::s_pCurBufferPointer)[1] = Common::swap16(pData[3 * i + 1]);
((u16*)VertexManager::s_pCurBufferPointer)[2] = Common::swap16(pData[3 * i + 2]);
((u16*)VertexManager::s_pCurBufferPointer)[3] = 0;
VertexManager::s_pCurBufferPointer += 8;
LOG_NORM16();
} }
} }
void LOADERDECL VertexLoader_Normal::Normal_Index8_Float3_Indices1() void LOADERDECL VertexLoader_Normal::Normal_Index8_Float3_Indices1()
{ {
u8 Index = DataReadU8(); u8 Index = DataReadU8();
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL])); const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
for (int i = 0; i < 3; i++) for (int i = 0; i < 3; i++)
{ {
((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[3 * i]); ReadIndirectFloat((const u32*)(&pData[3 * i]));
((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[3 * i + 1]);
((u32*)VertexManager::s_pCurBufferPointer)[2] = Common::swap32(pData[3 * i + 2]);
VertexManager::s_pCurBufferPointer += 12;
LOG_NORMF();
} }
} }
@ -355,29 +271,11 @@ void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte3_Indices3()
for (int i = 0; i < 3; i++) for (int i = 0; i < 3; i++)
{ {
u8 Index = DataReadU8(); u8 Index = DataReadU8();
const u8* pData = cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 1*3*i; const s8* pData = (const s8*)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 1*3*i);
*VertexManager::s_pCurBufferPointer++ = pData[0]; ReadIndirectS8(pData);
*VertexManager::s_pCurBufferPointer++ = pData[1];
*VertexManager::s_pCurBufferPointer++ = pData[2];
*VertexManager::s_pCurBufferPointer++ = 0;
LOG_NORM8();
} }
} }
void LOADERDECL VertexLoader_Normal::Normal_Index8_Byte3_Indices3_Expand16()
{
for (int i = 0; i < 3; i++)
{
u8 Index = DataReadU8();
const s8* pData = (const s8 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 2*3*i);
((s16*)VertexManager::s_pCurBufferPointer)[0] = Expands8Tos16(pData[0]);
((s16*)VertexManager::s_pCurBufferPointer)[1] = Expands8Tos16(pData[1]);
((s16*)VertexManager::s_pCurBufferPointer)[2] = Expands8Tos16(pData[2]);
((s16*)VertexManager::s_pCurBufferPointer)[3] = 0;
VertexManager::s_pCurBufferPointer += 8;
LOG_NORM16();
}
}
void LOADERDECL VertexLoader_Normal::Normal_Index8_Short3_Indices3() void LOADERDECL VertexLoader_Normal::Normal_Index8_Short3_Indices3()
{ {
@ -385,12 +283,7 @@ void LOADERDECL VertexLoader_Normal::Normal_Index8_Short3_Indices3()
{ {
u8 Index = DataReadU8(); u8 Index = DataReadU8();
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 2*3*i); const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 2*3*i);
((u16*)VertexManager::s_pCurBufferPointer)[0] = Common::swap16(pData[0]); ReadIndirectS16(pData);
((u16*)VertexManager::s_pCurBufferPointer)[1] = Common::swap16(pData[1]);
((u16*)VertexManager::s_pCurBufferPointer)[2] = Common::swap16(pData[2]);
((u16*)VertexManager::s_pCurBufferPointer)[3] = 0;
VertexManager::s_pCurBufferPointer += 8;
LOG_NORM16();
} }
} }
@ -400,11 +293,7 @@ void LOADERDECL VertexLoader_Normal::Normal_Index8_Float3_Indices3()
{ {
u8 Index = DataReadU8(); u8 Index = DataReadU8();
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 4*3*i); const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 4*3*i);
((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]); ReadIndirectFloat(pData);
((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]);
((u32*)VertexManager::s_pCurBufferPointer)[2] = Common::swap32(pData[2]);
VertexManager::s_pCurBufferPointer += 12;
LOG_NORMF();
} }
} }
@ -413,93 +302,33 @@ void LOADERDECL VertexLoader_Normal::Normal_Index8_Float3_Indices3()
void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte() void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte()
{
u16 Index = DataReadU16();
const u8* pData = cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]);
*VertexManager::s_pCurBufferPointer++ = pData[0];
*VertexManager::s_pCurBufferPointer++ = pData[1];
*VertexManager::s_pCurBufferPointer++ = pData[2];
*VertexManager::s_pCurBufferPointer++ = 0;
LOG_NORM8();
}
void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte_Expand16()
{ {
u16 Index = DataReadU16(); u16 Index = DataReadU16();
const s8* pData = (const s8 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL])); const s8* pData = (const s8 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
((s16*)VertexManager::s_pCurBufferPointer)[0] = Expands8Tos16(pData[0]); ReadIndirectS8(pData);
((s16*)VertexManager::s_pCurBufferPointer)[1] = Expands8Tos16(pData[1]);
((s16*)VertexManager::s_pCurBufferPointer)[2] = Expands8Tos16(pData[2]);
((s16*)VertexManager::s_pCurBufferPointer)[3] = 0;
VertexManager::s_pCurBufferPointer += 8;
LOG_NORM16();
} }
void LOADERDECL VertexLoader_Normal::Normal_Index16_Short() void LOADERDECL VertexLoader_Normal::Normal_Index16_Short()
{ {
u16 Index = DataReadU16(); u16 Index = DataReadU16();
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL])); const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
((u16*)VertexManager::s_pCurBufferPointer)[0] = Common::swap16(pData[0]); ReadIndirectS16(pData);
((u16*)VertexManager::s_pCurBufferPointer)[1] = Common::swap16(pData[1]);
((u16*)VertexManager::s_pCurBufferPointer)[2] = Common::swap16(pData[2]);
((u16*)VertexManager::s_pCurBufferPointer)[3] = 0;
VertexManager::s_pCurBufferPointer += 8;
LOG_NORM16();
} }
#if _M_SSE >= 0x301
static const __m128i kMaskSwap16_3 = _mm_set_epi32(0xFFFFFFFFL, 0xFFFFFFFFL, 0x0FFFF0405L, 0x02030001L);
void LOADERDECL VertexLoader_Normal::Normal_Index16_Short_SSSE3()
{
u16 Index = DataReadU16();
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
__m128i a = _mm_loadl_epi64((__m128i*)pData);
__m128i b = _mm_shuffle_epi8(a, kMaskSwap16_3);
_mm_storel_epi64((__m128i*)VertexManager::s_pCurBufferPointer, b);
VertexManager::s_pCurBufferPointer += 8;
LOG_NORM16();
}
#endif
void LOADERDECL VertexLoader_Normal::Normal_Index16_Float() void LOADERDECL VertexLoader_Normal::Normal_Index16_Float()
{ {
u16 Index = DataReadU16(); u16 Index = DataReadU16();
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL])); const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]); ReadIndirectFloat(pData);
((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]);
((u32*)VertexManager::s_pCurBufferPointer)[2] = Common::swap32(pData[2]);
VertexManager::s_pCurBufferPointer += 12;
LOG_NORMF();
} }
void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte3_Indices1() void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte3_Indices1()
{ {
u16 Index = DataReadU16(); u16 Index = DataReadU16();
const u8* pData = cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]); const s8* pData = (const s8 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
for (int i = 0; i < 3; i++) for (int i = 0; i < 3; i++)
{ {
*VertexManager::s_pCurBufferPointer++ = pData[3 * i]; ReadIndirectS8((const s8 *)(&pData[3 * i]));
*VertexManager::s_pCurBufferPointer++ = pData[3 * i + 1];
*VertexManager::s_pCurBufferPointer++ = pData[3 * i + 2];
*VertexManager::s_pCurBufferPointer++ = 0;
LOG_NORM8();
}
}
void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte3_Indices1_Expand16()
{
u16 Index = DataReadU16();
const s8* pData = (const s8 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]));
for (int i = 0; i < 3; i++)
{
((s16*)VertexManager::s_pCurBufferPointer)[0] = Expands8Tos16(pData[3 * i]);
((s16*)VertexManager::s_pCurBufferPointer)[1] = Expands8Tos16(pData[3 * i + 1]);
((s16*)VertexManager::s_pCurBufferPointer)[2] = Expands8Tos16(pData[3 * i + 2]);
((s16*)VertexManager::s_pCurBufferPointer)[3] = 0;
VertexManager::s_pCurBufferPointer += 8;
LOG_NORM16();
} }
} }
@ -510,12 +339,7 @@ void LOADERDECL VertexLoader_Normal::Normal_Index16_Short3_Indices1()
for (int i = 0; i < 3; i++) for (int i = 0; i < 3; i++)
{ {
((u16*)VertexManager::s_pCurBufferPointer)[0] = Common::swap16(pData[3 * i]); ReadIndirectS16((const u16 *)(&pData[3 * i]));
((u16*)VertexManager::s_pCurBufferPointer)[1] = Common::swap16(pData[3 * i + 1]);
((u16*)VertexManager::s_pCurBufferPointer)[2] = Common::swap16(pData[3 * i + 2]);
((u16*)VertexManager::s_pCurBufferPointer)[3] = 0;
VertexManager::s_pCurBufferPointer += 8;
LOG_NORM16();
} }
} }
@ -526,11 +350,7 @@ void LOADERDECL VertexLoader_Normal::Normal_Index16_Float3_Indices1()
for (int i = 0; i < 3; i++) for (int i = 0; i < 3; i++)
{ {
((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[3 * i]); ReadIndirectFloat((const u32 *)(&pData[3 * i]));
((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[3 * i + 1]);
((u32*)VertexManager::s_pCurBufferPointer)[2] = Common::swap32(pData[3 * i + 2]);
VertexManager::s_pCurBufferPointer += 12;
LOG_NORMF();
} }
} }
@ -539,42 +359,18 @@ void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte3_Indices3()
for (int i = 0; i < 3; i++) for (int i = 0; i < 3; i++)
{ {
u16 Index = DataReadU16(); u16 Index = DataReadU16();
const u8* pData = cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 1*3*i; const s8* pData = (const s8*)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 1*3*i);
*VertexManager::s_pCurBufferPointer++ = pData[0]; ReadIndirectS8(pData);
*VertexManager::s_pCurBufferPointer++ = pData[1];
*VertexManager::s_pCurBufferPointer++ = pData[2];
*VertexManager::s_pCurBufferPointer++ = 0;
LOG_NORM8();
} }
} }
void LOADERDECL VertexLoader_Normal::Normal_Index16_Byte3_Indices3_Expand16()
{
for (int i = 0; i < 3; i++)
{
u16 Index = DataReadU16();
const s8* pData = (const s8 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 2*3*i);
((s16*)VertexManager::s_pCurBufferPointer)[0] = Expands8Tos16(pData[0]);
((s16*)VertexManager::s_pCurBufferPointer)[1] = Expands8Tos16(pData[1]);
((s16*)VertexManager::s_pCurBufferPointer)[2] = Expands8Tos16(pData[2]);
((s16*)VertexManager::s_pCurBufferPointer)[3] = 0;
VertexManager::s_pCurBufferPointer += 8;
LOG_NORM16();
}
}
void LOADERDECL VertexLoader_Normal::Normal_Index16_Short3_Indices3() void LOADERDECL VertexLoader_Normal::Normal_Index16_Short3_Indices3()
{ {
for (int i = 0; i < 3; i++) for (int i = 0; i < 3; i++)
{ {
u16 Index = DataReadU16(); u16 Index = DataReadU16();
const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 2*3*i); const u16* pData = (const u16 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 2*3*i);
((u16*)VertexManager::s_pCurBufferPointer)[0] = Common::swap16(pData[0]); ReadIndirectS16(pData);
((u16*)VertexManager::s_pCurBufferPointer)[1] = Common::swap16(pData[1]);
((u16*)VertexManager::s_pCurBufferPointer)[2] = Common::swap16(pData[2]);
((u16*)VertexManager::s_pCurBufferPointer)[3] = 0;
VertexManager::s_pCurBufferPointer += 8;
LOG_NORM16();
} }
} }
@ -584,10 +380,6 @@ void LOADERDECL VertexLoader_Normal::Normal_Index16_Float3_Indices3()
{ {
u16 Index = DataReadU16(); u16 Index = DataReadU16();
const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 4*3*i); const u32* pData = (const u32 *)(cached_arraybases[ARRAY_NORMAL] + (Index * arraystrides[ARRAY_NORMAL]) + 4*3*i);
((u32*)VertexManager::s_pCurBufferPointer)[0] = Common::swap32(pData[0]); ReadIndirectFloat(pData);
((u32*)VertexManager::s_pCurBufferPointer)[1] = Common::swap32(pData[1]);
((u32*)VertexManager::s_pCurBufferPointer)[2] = Common::swap32(pData[2]);
VertexManager::s_pCurBufferPointer += 12;
LOG_NORMF();
} }
} }

View File

@ -78,51 +78,35 @@ private:
static Set m_Table[NUM_NRM_TYPE][NUM_NRM_INDICES][NUM_NRM_ELEMENTS][NUM_NRM_FORMAT]; static Set m_Table[NUM_NRM_TYPE][NUM_NRM_INDICES][NUM_NRM_ELEMENTS][NUM_NRM_FORMAT];
// You can't pass signed bytes to D3D9 so we special case them to expand to signed shorts
static Set m_TableExpand16[NUM_NRM_TYPE][NUM_NRM_INDICES][NUM_NRM_ELEMENTS][NUM_NRM_FORMAT];
// direct // direct
static void LOADERDECL Normal_DirectByte(); static void LOADERDECL Normal_DirectByte();
static void LOADERDECL Normal_DirectByte_Expand16();
static void LOADERDECL Normal_DirectShort(); static void LOADERDECL Normal_DirectShort();
static void LOADERDECL Normal_DirectFloat(); static void LOADERDECL Normal_DirectFloat();
static void LOADERDECL Normal_DirectByte3(); static void LOADERDECL Normal_DirectByte3();
static void LOADERDECL Normal_DirectByte3_Expand16();
static void LOADERDECL Normal_DirectShort3(); static void LOADERDECL Normal_DirectShort3();
static void LOADERDECL Normal_DirectFloat3(); static void LOADERDECL Normal_DirectFloat3();
// index8 // index8
static void LOADERDECL Normal_Index8_Byte(); static void LOADERDECL Normal_Index8_Byte();
static void LOADERDECL Normal_Index8_Byte_Expand16();
static void LOADERDECL Normal_Index8_Short(); static void LOADERDECL Normal_Index8_Short();
static void LOADERDECL Normal_Index8_Float(); static void LOADERDECL Normal_Index8_Float();
static void LOADERDECL Normal_Index8_Byte3_Indices1(); static void LOADERDECL Normal_Index8_Byte3_Indices1();
static void LOADERDECL Normal_Index8_Byte3_Indices1_Expand16();
static void LOADERDECL Normal_Index8_Short3_Indices1(); static void LOADERDECL Normal_Index8_Short3_Indices1();
static void LOADERDECL Normal_Index8_Float3_Indices1(); static void LOADERDECL Normal_Index8_Float3_Indices1();
static void LOADERDECL Normal_Index8_Byte3_Indices3(); static void LOADERDECL Normal_Index8_Byte3_Indices3();
static void LOADERDECL Normal_Index8_Byte3_Indices3_Expand16(); static void LOADERDECL Normal_Index8_Short3_Indices3();
static void LOADERDECL Normal_Index8_Short3_Indices3();
static void LOADERDECL Normal_Index8_Float3_Indices3(); static void LOADERDECL Normal_Index8_Float3_Indices3();
// index16 // index16
static void LOADERDECL Normal_Index16_Byte(); static void LOADERDECL Normal_Index16_Byte();
static void LOADERDECL Normal_Index16_Byte_Expand16();
static void LOADERDECL Normal_Index16_Short(); static void LOADERDECL Normal_Index16_Short();
static void LOADERDECL Normal_Index16_Float(); static void LOADERDECL Normal_Index16_Float();
static void LOADERDECL Normal_Index16_Byte3_Indices1(); static void LOADERDECL Normal_Index16_Byte3_Indices1();
static void LOADERDECL Normal_Index16_Byte3_Indices1_Expand16();
static void LOADERDECL Normal_Index16_Short3_Indices1(); static void LOADERDECL Normal_Index16_Short3_Indices1();
static void LOADERDECL Normal_Index16_Float3_Indices1(); static void LOADERDECL Normal_Index16_Float3_Indices1();
static void LOADERDECL Normal_Index16_Byte3_Indices3(); static void LOADERDECL Normal_Index16_Byte3_Indices3();
static void LOADERDECL Normal_Index16_Byte3_Indices3_Expand16();
static void LOADERDECL Normal_Index16_Short3_Indices3(); static void LOADERDECL Normal_Index16_Short3_Indices3();
static void LOADERDECL Normal_Index16_Float3_Indices3(); static void LOADERDECL Normal_Index16_Float3_Indices3();
#if _M_SSE >= 0x301
static void LOADERDECL Normal_Index16_Short_SSSE3();
#endif
}; };
#endif #endif