Rewrite MulMatricesC in a more auto-vectorizable way.

c5998a531b
This commit is contained in:
zilmar 2015-10-10 23:23:26 +11:00
parent f2d1097014
commit 1dd25b2063
1 changed files with 32 additions and 8 deletions

View File

@ -190,16 +190,40 @@ void InverseTransformVectorC (float *src, float *dst, float mat[4][4])
void MulMatricesC(float m1[4][4],float m2[4][4],float r[4][4]) void MulMatricesC(float m1[4][4],float m2[4][4],float r[4][4])
{ {
for (int i=0; i<4; i++) float row[4][4];
{ register unsigned int i, j;
for (int j=0; j<4; j++)
for (i = 0; i < 4; i++)
for (j = 0; j < 4; j++)
row[i][j] = m2[i][j];
for (i = 0; i < 4; i++)
{ {
r[i][j] = m1[i][0] * m2[0][j] + // auto-vectorizable algorithm
m1[i][1] * m2[1][j] + // vectorized loop style, such that compilers can
m1[i][2] * m2[2][j] + // easily create optimized SSE instructions.
m1[i][3] * m2[3][j]; float leftrow[4];
float summand[4][4];
for (j = 0; j < 4; j++)
leftrow[j] = m1[i][j];
for (j = 0; j < 4; j++)
summand[0][j] = leftrow[0] * row[0][j];
for (j = 0; j < 4; j++)
summand[1][j] = leftrow[1] * row[1][j];
for (j = 0; j < 4; j++)
summand[2][j] = leftrow[2] * row[2][j];
for (j = 0; j < 4; j++)
summand[3][j] = leftrow[3] * row[3][j];
for (j = 0; j < 4; j++)
r[i][j] =
summand[0][j]
+ summand[1][j]
+ summand[2][j]
+ summand[3][j]
;
} }
}
} }
// 2008.03.29 H.Morii - added SSE 3DNOW! 3x3 1x3 matrix multiplication // 2008.03.29 H.Morii - added SSE 3DNOW! 3x3 1x3 matrix multiplication