[PJGlide64] TransformVector C is better than TransformVectorSSE.

This commit is contained in:
unknown 2015-06-18 20:29:15 -04:00
parent dcf2854309
commit 6e675e4c28
2 changed files with 0 additions and 37 deletions

View File

@ -203,7 +203,6 @@ TRANSFORMVECTOR InverseTransformVector = InverseTransformVectorC;
DOTPRODUCT DotProduct = DotProductC;
NORMALIZEVECTOR NormalizeVector = NormalizeVectorC;
extern "C" void TransformVectorSSE(float *src, float *dst, float mat[4][4]);
extern "C" void TransformVector3DNOW(float *src, float *dst, float mat[4][4]);
extern "C" void InverseTransformVector3DNOW(float *src, float *dst, float mat[4][4]);
extern "C" void MulMatricesSSE(float m1[4][4],float m2[4][4],float r[4][4]);
@ -231,7 +230,6 @@ void math_init()
if (iedx & 0x2000000) //SSE
{
MulMatrices = MulMatricesSSE;
TransformVector = TransformVectorSSE;
//InverseTransformVector = InverseTransformVectorSSE;
//NormalizeVector = NormalizeVectorSSE; /* not ready yet */
LOG("SSE detected.\n");

View File

@ -61,41 +61,6 @@ extern "C" void __declspec(naked) DetectSIMD(int func, int * iedx, int * iecx)
;
;****************************************************************/
extern "C" void __declspec(naked) TransformVectorSSE(float *src, float *dst, float mat[4][4])
{
__asm
{
push ebp
mov ebp,esp
mov ecx,[src]
mov eax,[dst]
mov edx,[mat]
movss xmm0,[ecx] ; 0 0 0 src[0]
movss xmm5,[edx] ; 0 0 0 mat[0][0]
movhps xmm5,[edx+4] ; mat[0][2] mat[0][1] 0 mat[0][0]
shufps xmm0,xmm0, 0 ; src[0] src[0] src[0] src[0]
movss xmm1,[ecx+4] ; 0 0 0 src[1]
movss xmm3,[edx+16] ; 0 0 0 mat[1][0]
movhps xmm3,[edx+20] ; mat[1][2] mat[1][1] 0 mat[1][0]
shufps xmm1,xmm1, 0 ; src[1] src[1] src[1] src[1]
mulps xmm0,xmm5 ; mat[0][2]*src[0] mat[0][1]*src[0] 0 mat[0][0]*src[0]
mulps xmm1,xmm3 ; mat[1][2]*src[1] mat[1][1]*src[1] 0 mat[1][0]*src[1]
movss xmm2,[ecx+8] ; 0 0 0 src[2]
shufps xmm2,xmm2, 0 ; src[2] src[2] src[2] src[2]
movss xmm4,[edx+32] ; 0 0 0 mat[2][0]
movhps xmm4,[edx+36] ; mat[2][2] mat[2][1] 0 mat[2][0]
addps xmm0,xmm1 ; mat[0][2]*src[0]+mat[1][2]*src[1] mat[0][1]*src[0]+mat[1][1]*src[1] 0 mat[0][0]*src[0]+mat[1][0]*src[1]
mulps xmm2,xmm4 ; mat[2][2]*src[2] mat[2][1]*src[2] 0 mat[2][0]*src[2]
addps xmm0,xmm2 ; mat[0][2]*src[0]+mat[1][2]*src[1]+mat[2][2]*src[2] mat[0][1]*src[0]+mat[1][1]*src[1]+mat[2][1]*src[2] 0 mat[0][0]*src[0]+mat[1][0]*src[1]+mat[2][0]*src[2]
movss [eax],xmm0 ; mat[0][0]*src[0]+mat[1][0]*src[1]+mat[2][0]*src[2]
movhps [eax+4],xmm0 ; mat[0][2]*src[0]+mat[1][2]*src[1]+mat[2][2]*src[2] mat[0][1]*src[0]+mat[1][1]*src[1]+mat[2][1]*src[2]
leave
ret
}
}
extern "C" void __declspec(naked) MulMatricesSSE(float m1[4][4],float m2[4][4],float r[4][4])
{
__asm