Vertex loader: optimize texmtx_write_float4
Seems to be pretty high in the profile in some geometry-heavy games like The Last Story, and the compiler-generated assembly is terrifyingly bad, so SSE-ize it.
This commit is contained in:
parent
b9b3277fb5
commit
e3578683e3
|
@ -91,11 +91,17 @@ static void LOADERDECL TexMtx_Write_Float2()
|
||||||
|
|
||||||
static void LOADERDECL TexMtx_Write_Float4()
|
static void LOADERDECL TexMtx_Write_Float4()
|
||||||
{
|
{
|
||||||
|
#if _M_SSE >= 0x200
|
||||||
|
__m128 output = _mm_cvtsi32_ss(_mm_castsi128_ps(_mm_setzero_si128()), s_curtexmtx[s_texmtxwrite++]);
|
||||||
|
_mm_storeu_ps((float*)VertexManager::s_pCurBufferPointer, _mm_shuffle_ps(output, output, 0x45 /* 1, 1, 0, 1 */));
|
||||||
|
VertexManager::s_pCurBufferPointer += sizeof(float) * 4;
|
||||||
|
#else
|
||||||
DataWrite(0.f);
|
DataWrite(0.f);
|
||||||
DataWrite(0.f);
|
DataWrite(0.f);
|
||||||
DataWrite(float(s_curtexmtx[s_texmtxwrite++]));
|
DataWrite(float(s_curtexmtx[s_texmtxwrite++]));
|
||||||
// Just to fill out with 0.
|
// Just to fill out with 0.
|
||||||
DataWrite(0.f);
|
DataWrite(0.f);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
VertexLoader::VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr)
|
VertexLoader::VertexLoader(const TVtxDesc &vtx_desc, const VAT &vtx_attr)
|
||||||
|
|
Loading…
Reference in New Issue