- Add many of NHerve's improvements into OGLRender because I was trying to fix all the 3d issues
- Track polycount better. still worthless: at the very least, it doesnt account for clipping and culling - carry w=1 from vertex() through pipeline (this will be necessary for software 3d rendering) - Make GPU matrix mult and load commands clear out unused rows and cols to identity correctly - Make matrix 4x4 multiply routines use W-coordinate.
This commit is contained in:
parent
7d2fc8964e
commit
5278185e73
|
@ -27,6 +27,12 @@
|
|||
- Some fixes in 3D core OGL (fixed textures) [CrazyMax]
|
||||
- Added texture caching (speedup 3D core) [CrazyMax]
|
||||
- Fixes clear depth (ex. Castlevania now don't flipping) [NHerve]
|
||||
- Make matrix 4x4 multiply routines use W-coordinate. [zeromus]
|
||||
- Make GPU matrix mult and load commands clear out unused rows and cols to identity correctly;
|
||||
carry w=1 from vertex() through pipeline (this will be necessary for software 3d rendering) [zeromus]
|
||||
- Track polycount better. still worthless: at the very least, it doesnt account for clipping and culling [zeromus]
|
||||
- Fix errors in matrix operations regarding projection mode and pos-vector mode [zeromus]
|
||||
- Fix error in command unpacking which caused some display lists to totally blow up [zeromus]
|
||||
|
||||
0.7.3 -> 0.8
|
||||
Cocoa:
|
||||
|
|
|
@ -1,228 +1,255 @@
|
|||
/*
|
||||
Copyright (C) 2006-2007 shash
|
||||
|
||||
This file is part of DeSmuME
|
||||
|
||||
DeSmuME is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
DeSmuME is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with DeSmuME; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include "matrix.h"
|
||||
|
||||
void MatrixInit (float *matrix)
|
||||
{
|
||||
memset (matrix, 0, sizeof(float)*16);
|
||||
matrix[0] = matrix[5] = matrix[10] = matrix[15] = 1.f;
|
||||
}
|
||||
|
||||
#ifdef SSE2
|
||||
void __fastcall MatrixIdentity (float *matrix) //============== TODO
|
||||
{
|
||||
memset (matrix, 0, sizeof(float)*16);
|
||||
matrix[0] = matrix[5] = matrix[10] = matrix[15] = 1.f;
|
||||
}
|
||||
|
||||
float __fastcall MatrixGetMultipliedIndex (int index, float *matrix, float *rightMatrix)
|
||||
{
|
||||
int iMod = index%4, iDiv = (index>>2)<<2;
|
||||
|
||||
return (matrix[iMod ]*rightMatrix[iDiv ])+(matrix[iMod+ 4]*rightMatrix[iDiv+1])+
|
||||
(matrix[iMod+8]*rightMatrix[iDiv+2])+(matrix[iMod+12]*rightMatrix[iDiv+3]);
|
||||
}
|
||||
|
||||
void __fastcall MatrixSet (float *matrix, int x, int y, float value) // TODO
|
||||
{
|
||||
matrix [x+(y<<2)] = value;
|
||||
}
|
||||
|
||||
void __fastcall MatrixCopy (float *matrixDST, float *matrixSRC)
|
||||
{
|
||||
memcpy (matrixDST, matrixSRC, sizeof(float)*16);
|
||||
}
|
||||
#else
|
||||
void MatrixMultVec4x4 (float *matrix, float *vecPtr)
|
||||
{
|
||||
float x = vecPtr[0];
|
||||
float y = vecPtr[1];
|
||||
float z = vecPtr[2];
|
||||
|
||||
vecPtr[0] = x * matrix[0] + y * matrix[4] + z * matrix[ 8] + matrix[12];
|
||||
vecPtr[1] = x * matrix[1] + y * matrix[5] + z * matrix[ 9] + matrix[13];
|
||||
vecPtr[2] = x * matrix[2] + y * matrix[6] + z * matrix[10] + matrix[14];
|
||||
}
|
||||
|
||||
void MatrixMultVec3x3 (float *matrix, float *vecPtr)
|
||||
{
|
||||
float x = vecPtr[0];
|
||||
float y = vecPtr[1];
|
||||
float z = vecPtr[2];
|
||||
|
||||
vecPtr[0] = x * matrix[0] + y * matrix[4] + z * matrix[ 8];
|
||||
vecPtr[1] = x * matrix[1] + y * matrix[5] + z * matrix[ 9];
|
||||
vecPtr[2] = x * matrix[2] + y * matrix[6] + z * matrix[10];
|
||||
}
|
||||
|
||||
void MatrixIdentity (float *matrix)
|
||||
{
|
||||
memset (matrix, 0, sizeof(float)*16);
|
||||
|
||||
matrix[0] = matrix[5] = matrix[10] = matrix[15] = 1.f;
|
||||
}
|
||||
|
||||
void MatrixMultiply (float *matrix, float *rightMatrix)
|
||||
{
|
||||
float tmpMatrix[16];
|
||||
|
||||
tmpMatrix[0] = (matrix[0]*rightMatrix[0])+(matrix[4]*rightMatrix[1])+(matrix[8]*rightMatrix[2])+(matrix[12]*rightMatrix[3]);
|
||||
tmpMatrix[1] = (matrix[1]*rightMatrix[0])+(matrix[5]*rightMatrix[1])+(matrix[9]*rightMatrix[2])+(matrix[13]*rightMatrix[3]);
|
||||
tmpMatrix[2] = (matrix[2]*rightMatrix[0])+(matrix[6]*rightMatrix[1])+(matrix[10]*rightMatrix[2])+(matrix[14]*rightMatrix[3]);
|
||||
tmpMatrix[3] = (matrix[3]*rightMatrix[0])+(matrix[7]*rightMatrix[1])+(matrix[11]*rightMatrix[2])+(matrix[15]*rightMatrix[3]);
|
||||
|
||||
tmpMatrix[4] = (matrix[0]*rightMatrix[4])+(matrix[4]*rightMatrix[5])+(matrix[8]*rightMatrix[6])+(matrix[12]*rightMatrix[7]);
|
||||
tmpMatrix[5] = (matrix[1]*rightMatrix[4])+(matrix[5]*rightMatrix[5])+(matrix[9]*rightMatrix[6])+(matrix[13]*rightMatrix[7]);
|
||||
tmpMatrix[6] = (matrix[2]*rightMatrix[4])+(matrix[6]*rightMatrix[5])+(matrix[10]*rightMatrix[6])+(matrix[14]*rightMatrix[7]);
|
||||
tmpMatrix[7] = (matrix[3]*rightMatrix[4])+(matrix[7]*rightMatrix[5])+(matrix[11]*rightMatrix[6])+(matrix[15]*rightMatrix[7]);
|
||||
|
||||
tmpMatrix[8] = (matrix[0]*rightMatrix[8])+(matrix[4]*rightMatrix[9])+(matrix[8]*rightMatrix[10])+(matrix[12]*rightMatrix[11]);
|
||||
tmpMatrix[9] = (matrix[1]*rightMatrix[8])+(matrix[5]*rightMatrix[9])+(matrix[9]*rightMatrix[10])+(matrix[13]*rightMatrix[11]);
|
||||
tmpMatrix[10] = (matrix[2]*rightMatrix[8])+(matrix[6]*rightMatrix[9])+(matrix[10]*rightMatrix[10])+(matrix[14]*rightMatrix[11]);
|
||||
tmpMatrix[11] = (matrix[3]*rightMatrix[8])+(matrix[7]*rightMatrix[9])+(matrix[11]*rightMatrix[10])+(matrix[15]*rightMatrix[11]);
|
||||
|
||||
tmpMatrix[12] = (matrix[0]*rightMatrix[12])+(matrix[4]*rightMatrix[13])+(matrix[8]*rightMatrix[14])+(matrix[12]*rightMatrix[15]);
|
||||
tmpMatrix[13] = (matrix[1]*rightMatrix[12])+(matrix[5]*rightMatrix[13])+(matrix[9]*rightMatrix[14])+(matrix[13]*rightMatrix[15]);
|
||||
tmpMatrix[14] = (matrix[2]*rightMatrix[12])+(matrix[6]*rightMatrix[13])+(matrix[10]*rightMatrix[14])+(matrix[14]*rightMatrix[15]);
|
||||
tmpMatrix[15] = (matrix[3]*rightMatrix[12])+(matrix[7]*rightMatrix[13])+(matrix[11]*rightMatrix[14])+(matrix[15]*rightMatrix[15]);
|
||||
|
||||
memcpy (matrix, tmpMatrix, sizeof(float)*16);
|
||||
}
|
||||
|
||||
float MatrixGetMultipliedIndex (int index, float *matrix, float *rightMatrix)
|
||||
{
|
||||
int iMod = index%4, iDiv = (index>>2)<<2;
|
||||
|
||||
return (matrix[iMod ]*rightMatrix[iDiv ])+(matrix[iMod+ 4]*rightMatrix[iDiv+1])+
|
||||
(matrix[iMod+8]*rightMatrix[iDiv+2])+(matrix[iMod+12]*rightMatrix[iDiv+3]);
|
||||
}
|
||||
|
||||
void MatrixSet (float *matrix, int x, int y, float value)
|
||||
{
|
||||
matrix [x+(y<<2)] = value;
|
||||
}
|
||||
|
||||
void MatrixCopy (float *matrixDST, float *matrixSRC)
|
||||
{
|
||||
memcpy (matrixDST, matrixSRC, sizeof(float)*16);
|
||||
}
|
||||
|
||||
void MatrixTranslate (float *matrix, float *ptr)
|
||||
{
|
||||
matrix[12] += (matrix[0]*ptr[0])+(matrix[4]*ptr[1])+(matrix[ 8]*ptr[2]);
|
||||
matrix[13] += (matrix[1]*ptr[0])+(matrix[5]*ptr[1])+(matrix[ 9]*ptr[2]);
|
||||
matrix[14] += (matrix[2]*ptr[0])+(matrix[6]*ptr[1])+(matrix[10]*ptr[2]);
|
||||
matrix[15] += (matrix[3]*ptr[0])+(matrix[7]*ptr[1])+(matrix[11]*ptr[2]);
|
||||
}
|
||||
|
||||
void MatrixScale (float *matrix, float *ptr)
|
||||
{
|
||||
matrix[0] *= ptr[0];
|
||||
matrix[1] *= ptr[0];
|
||||
matrix[2] *= ptr[0];
|
||||
matrix[3] *= ptr[0];
|
||||
|
||||
matrix[4] *= ptr[1];
|
||||
matrix[5] *= ptr[1];
|
||||
matrix[6] *= ptr[1];
|
||||
matrix[7] *= ptr[1];
|
||||
|
||||
matrix[8] *= ptr[2];
|
||||
matrix[9] *= ptr[2];
|
||||
matrix[10] *= ptr[2];
|
||||
matrix[11] *= ptr[2];
|
||||
}
|
||||
#endif
|
||||
//-----------------------------------------
|
||||
|
||||
void MatrixStackInit (MatrixStack *stack)
|
||||
{
|
||||
stack->matrix = NULL;
|
||||
stack->position = 0;
|
||||
stack->size = 0;
|
||||
}
|
||||
|
||||
void MatrixStackSetMaxSize (MatrixStack *stack, int size)
|
||||
{
|
||||
int i = 0;
|
||||
|
||||
stack->size = size;
|
||||
|
||||
if (stack->matrix == NULL)
|
||||
{
|
||||
stack->matrix = (float*) malloc (stack->size*16*sizeof(float));
|
||||
}
|
||||
else
|
||||
{
|
||||
free (stack->matrix);
|
||||
stack->matrix = (float*) malloc (stack->size*16*sizeof(float));
|
||||
}
|
||||
|
||||
for (i = 0; i < stack->size; i++)
|
||||
{
|
||||
MatrixInit (&stack->matrix[i*16]);
|
||||
}
|
||||
|
||||
stack->size--;
|
||||
}
|
||||
|
||||
|
||||
void MatrixStackSetStackPosition (MatrixStack *stack, int pos)
|
||||
{
|
||||
stack->position += pos;
|
||||
|
||||
if (stack->position < 0)
|
||||
stack->position = 0;
|
||||
else if (stack->position > stack->size)
|
||||
stack->position = stack->size;
|
||||
}
|
||||
|
||||
void MatrixStackPushMatrix (MatrixStack *stack, float *ptr)
|
||||
{
|
||||
MatrixCopy (&stack->matrix[stack->position*16], ptr);
|
||||
|
||||
MatrixStackSetStackPosition (stack, 1);
|
||||
}
|
||||
|
||||
float * MatrixStackPopMatrix (MatrixStack *stack, int size)
|
||||
{
|
||||
MatrixStackSetStackPosition(stack, -size);
|
||||
|
||||
return &stack->matrix[stack->position*16];
|
||||
}
|
||||
|
||||
float * MatrixStackGetPos (MatrixStack *stack, int pos)
|
||||
{
|
||||
return &stack->matrix[pos*16];
|
||||
}
|
||||
|
||||
float * MatrixStackGet (MatrixStack *stack)
|
||||
{
|
||||
return &stack->matrix[stack->position*16];
|
||||
}
|
||||
|
||||
void MatrixStackLoadMatrix (MatrixStack *stack, int pos, float *ptr)
|
||||
{
|
||||
MatrixCopy (&stack->matrix[pos*16], ptr);
|
||||
}
|
||||
/*
|
||||
Copyright (C) 2006-2007 shash
|
||||
|
||||
This file is part of DeSmuME
|
||||
|
||||
DeSmuME is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
DeSmuME is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with DeSmuME; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include "matrix.h"
|
||||
|
||||
void MatrixInit (float *matrix)
|
||||
{
|
||||
memset (matrix, 0, sizeof(float)*16);
|
||||
matrix[0] = matrix[5] = matrix[10] = matrix[15] = 1.f;
|
||||
}
|
||||
|
||||
#ifdef SSE2
|
||||
void __fastcall MatrixIdentity (float *matrix) //============== TODO
|
||||
{
|
||||
memset (matrix, 0, sizeof(float)*16);
|
||||
matrix[0] = matrix[5] = matrix[10] = matrix[15] = 1.f;
|
||||
}
|
||||
|
||||
float __fastcall MatrixGetMultipliedIndex (int index, float *matrix, float *rightMatrix)
|
||||
{
|
||||
int iMod = index%4, iDiv = (index>>2)<<2;
|
||||
|
||||
return (matrix[iMod ]*rightMatrix[iDiv ])+(matrix[iMod+ 4]*rightMatrix[iDiv+1])+
|
||||
(matrix[iMod+8]*rightMatrix[iDiv+2])+(matrix[iMod+12]*rightMatrix[iDiv+3]);
|
||||
}
|
||||
|
||||
void __fastcall MatrixSet (float *matrix, int x, int y, float value) // TODO
|
||||
{
|
||||
matrix [x+(y<<2)] = value;
|
||||
}
|
||||
|
||||
void __fastcall MatrixCopy (float *matrixDST, float *matrixSRC)
|
||||
{
|
||||
memcpy (matrixDST, matrixSRC, sizeof(float)*16);
|
||||
}
|
||||
#else
|
||||
void MatrixMultVec4x4 (float *matrix, float *vecPtr)
|
||||
{
|
||||
float x = vecPtr[0];
|
||||
float y = vecPtr[1];
|
||||
float z = vecPtr[2];
|
||||
float w = vecPtr[3];
|
||||
|
||||
vecPtr[0] = x * matrix[0] + y * matrix[4] + z * matrix[ 8] + w * matrix[12];
|
||||
vecPtr[1] = x * matrix[1] + y * matrix[5] + z * matrix[ 9] + w * matrix[13];
|
||||
vecPtr[2] = x * matrix[2] + y * matrix[6] + z * matrix[10] + w * matrix[14];
|
||||
vecPtr[3] = x * matrix[3] + y * matrix[7] + z * matrix[11] + w * matrix[15];
|
||||
}
|
||||
|
||||
void MatrixMultVec3x3 (float *matrix, float *vecPtr)
|
||||
{
|
||||
float x = vecPtr[0];
|
||||
float y = vecPtr[1];
|
||||
float z = vecPtr[2];
|
||||
|
||||
vecPtr[0] = x * matrix[0] + y * matrix[4] + z * matrix[ 8];
|
||||
vecPtr[1] = x * matrix[1] + y * matrix[5] + z * matrix[ 9];
|
||||
vecPtr[2] = x * matrix[2] + y * matrix[6] + z * matrix[10];
|
||||
}
|
||||
|
||||
void MatrixIdentity (float *matrix)
|
||||
{
|
||||
memset (matrix, 0, sizeof(float)*16);
|
||||
|
||||
matrix[0] = matrix[5] = matrix[10] = matrix[15] = 1.f;
|
||||
}
|
||||
|
||||
void MatrixMultiply (float *matrix, float *rightMatrix)
|
||||
{
|
||||
float tmpMatrix[16];
|
||||
|
||||
tmpMatrix[0] = (matrix[0]*rightMatrix[0])+(matrix[4]*rightMatrix[1])+(matrix[8]*rightMatrix[2])+(matrix[12]*rightMatrix[3]);
|
||||
tmpMatrix[1] = (matrix[1]*rightMatrix[0])+(matrix[5]*rightMatrix[1])+(matrix[9]*rightMatrix[2])+(matrix[13]*rightMatrix[3]);
|
||||
tmpMatrix[2] = (matrix[2]*rightMatrix[0])+(matrix[6]*rightMatrix[1])+(matrix[10]*rightMatrix[2])+(matrix[14]*rightMatrix[3]);
|
||||
tmpMatrix[3] = (matrix[3]*rightMatrix[0])+(matrix[7]*rightMatrix[1])+(matrix[11]*rightMatrix[2])+(matrix[15]*rightMatrix[3]);
|
||||
|
||||
tmpMatrix[4] = (matrix[0]*rightMatrix[4])+(matrix[4]*rightMatrix[5])+(matrix[8]*rightMatrix[6])+(matrix[12]*rightMatrix[7]);
|
||||
tmpMatrix[5] = (matrix[1]*rightMatrix[4])+(matrix[5]*rightMatrix[5])+(matrix[9]*rightMatrix[6])+(matrix[13]*rightMatrix[7]);
|
||||
tmpMatrix[6] = (matrix[2]*rightMatrix[4])+(matrix[6]*rightMatrix[5])+(matrix[10]*rightMatrix[6])+(matrix[14]*rightMatrix[7]);
|
||||
tmpMatrix[7] = (matrix[3]*rightMatrix[4])+(matrix[7]*rightMatrix[5])+(matrix[11]*rightMatrix[6])+(matrix[15]*rightMatrix[7]);
|
||||
|
||||
tmpMatrix[8] = (matrix[0]*rightMatrix[8])+(matrix[4]*rightMatrix[9])+(matrix[8]*rightMatrix[10])+(matrix[12]*rightMatrix[11]);
|
||||
tmpMatrix[9] = (matrix[1]*rightMatrix[8])+(matrix[5]*rightMatrix[9])+(matrix[9]*rightMatrix[10])+(matrix[13]*rightMatrix[11]);
|
||||
tmpMatrix[10] = (matrix[2]*rightMatrix[8])+(matrix[6]*rightMatrix[9])+(matrix[10]*rightMatrix[10])+(matrix[14]*rightMatrix[11]);
|
||||
tmpMatrix[11] = (matrix[3]*rightMatrix[8])+(matrix[7]*rightMatrix[9])+(matrix[11]*rightMatrix[10])+(matrix[15]*rightMatrix[11]);
|
||||
|
||||
tmpMatrix[12] = (matrix[0]*rightMatrix[12])+(matrix[4]*rightMatrix[13])+(matrix[8]*rightMatrix[14])+(matrix[12]*rightMatrix[15]);
|
||||
tmpMatrix[13] = (matrix[1]*rightMatrix[12])+(matrix[5]*rightMatrix[13])+(matrix[9]*rightMatrix[14])+(matrix[13]*rightMatrix[15]);
|
||||
tmpMatrix[14] = (matrix[2]*rightMatrix[12])+(matrix[6]*rightMatrix[13])+(matrix[10]*rightMatrix[14])+(matrix[14]*rightMatrix[15]);
|
||||
tmpMatrix[15] = (matrix[3]*rightMatrix[12])+(matrix[7]*rightMatrix[13])+(matrix[11]*rightMatrix[14])+(matrix[15]*rightMatrix[15]);
|
||||
|
||||
memcpy (matrix, tmpMatrix, sizeof(float)*16);
|
||||
}
|
||||
|
||||
float MatrixGetMultipliedIndex (int index, float *matrix, float *rightMatrix)
|
||||
{
|
||||
int iMod = index%4, iDiv = (index>>2)<<2;
|
||||
|
||||
return (matrix[iMod ]*rightMatrix[iDiv ])+(matrix[iMod+ 4]*rightMatrix[iDiv+1])+
|
||||
(matrix[iMod+8]*rightMatrix[iDiv+2])+(matrix[iMod+12]*rightMatrix[iDiv+3]);
|
||||
}
|
||||
|
||||
void MatrixSet (float *matrix, int x, int y, float value)
|
||||
{
|
||||
matrix [x+(y<<2)] = value;
|
||||
}
|
||||
|
||||
void MatrixTranspose(float *matrix)
|
||||
{
|
||||
float temp;
|
||||
#define swap(A,B) temp = matrix[A];matrix[A] = matrix[B]; matrix[B] = temp;
|
||||
swap(1,4);
|
||||
swap(2,8);
|
||||
swap(3,0xC);
|
||||
swap(6,9);
|
||||
swap(7,0xD);
|
||||
swap(0xB,0xE);
|
||||
#undef swap
|
||||
|
||||
/*
|
||||
0 1 2 3
|
||||
4 5 6 7
|
||||
8 9 A B
|
||||
C D E F
|
||||
|
||||
0 4 8 C
|
||||
1 5 9 D
|
||||
2 6 A E
|
||||
3 7 B F
|
||||
*/
|
||||
}
|
||||
|
||||
void MatrixCopy (float *matrixDST, float *matrixSRC)
|
||||
{
|
||||
memcpy (matrixDST, matrixSRC, sizeof(float)*16);
|
||||
}
|
||||
|
||||
void MatrixTranslate (float *matrix, float *ptr)
|
||||
{
|
||||
matrix[12] += (matrix[0]*ptr[0])+(matrix[4]*ptr[1])+(matrix[ 8]*ptr[2]);
|
||||
matrix[13] += (matrix[1]*ptr[0])+(matrix[5]*ptr[1])+(matrix[ 9]*ptr[2]);
|
||||
matrix[14] += (matrix[2]*ptr[0])+(matrix[6]*ptr[1])+(matrix[10]*ptr[2]);
|
||||
matrix[15] += (matrix[3]*ptr[0])+(matrix[7]*ptr[1])+(matrix[11]*ptr[2]);
|
||||
}
|
||||
|
||||
void MatrixScale (float *matrix, float *ptr)
|
||||
{
|
||||
matrix[0] *= ptr[0];
|
||||
matrix[1] *= ptr[0];
|
||||
matrix[2] *= ptr[0];
|
||||
matrix[3] *= ptr[0];
|
||||
|
||||
matrix[4] *= ptr[1];
|
||||
matrix[5] *= ptr[1];
|
||||
matrix[6] *= ptr[1];
|
||||
matrix[7] *= ptr[1];
|
||||
|
||||
matrix[8] *= ptr[2];
|
||||
matrix[9] *= ptr[2];
|
||||
matrix[10] *= ptr[2];
|
||||
matrix[11] *= ptr[2];
|
||||
}
|
||||
#endif
|
||||
//-----------------------------------------
|
||||
|
||||
void MatrixStackInit (MatrixStack *stack)
|
||||
{
|
||||
stack->matrix = NULL;
|
||||
stack->position = 0;
|
||||
stack->size = 0;
|
||||
}
|
||||
|
||||
void MatrixStackSetMaxSize (MatrixStack *stack, int size)
|
||||
{
|
||||
int i = 0;
|
||||
|
||||
stack->size = size;
|
||||
|
||||
if (stack->matrix == NULL)
|
||||
{
|
||||
stack->matrix = (float*) malloc (stack->size*16*sizeof(float));
|
||||
}
|
||||
else
|
||||
{
|
||||
free (stack->matrix);
|
||||
stack->matrix = (float*) malloc (stack->size*16*sizeof(float));
|
||||
}
|
||||
|
||||
for (i = 0; i < stack->size; i++)
|
||||
{
|
||||
MatrixInit (&stack->matrix[i*16]);
|
||||
}
|
||||
|
||||
stack->size--;
|
||||
}
|
||||
|
||||
|
||||
void MatrixStackSetStackPosition (MatrixStack *stack, int pos)
|
||||
{
|
||||
stack->position += pos;
|
||||
|
||||
if (stack->position < 0)
|
||||
stack->position = 0;
|
||||
else if (stack->position > stack->size)
|
||||
stack->position = stack->size;
|
||||
}
|
||||
|
||||
void MatrixStackPushMatrix (MatrixStack *stack, float *ptr)
|
||||
{
|
||||
MatrixCopy (&stack->matrix[stack->position*16], ptr);
|
||||
|
||||
MatrixStackSetStackPosition (stack, 1);
|
||||
}
|
||||
|
||||
float * MatrixStackPopMatrix (MatrixStack *stack, int size)
|
||||
{
|
||||
MatrixStackSetStackPosition(stack, -size);
|
||||
|
||||
return &stack->matrix[stack->position*16];
|
||||
}
|
||||
|
||||
float * MatrixStackGetPos (MatrixStack *stack, int pos)
|
||||
{
|
||||
return &stack->matrix[pos*16];
|
||||
}
|
||||
|
||||
float * MatrixStackGet (MatrixStack *stack)
|
||||
{
|
||||
return &stack->matrix[stack->position*16];
|
||||
}
|
||||
|
||||
void MatrixStackLoadMatrix (MatrixStack *stack, int pos, float *ptr)
|
||||
{
|
||||
MatrixCopy (&stack->matrix[pos*16], ptr);
|
||||
}
|
||||
|
|
|
@ -1,79 +1,81 @@
|
|||
/*
|
||||
Copyright (C) 2006-2007 shash
|
||||
|
||||
This file is part of DeSmuME
|
||||
|
||||
DeSmuME is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
DeSmuME is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with DeSmuME; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
|
||||
#ifndef MATRIX_H
|
||||
#define MATRIX_H
|
||||
|
||||
#include "types.h"
|
||||
|
||||
#ifdef SSE2
|
||||
#include <xmmintrin.h>
|
||||
#include <emmintrin.h>
|
||||
//typedef __declspec(align(16)) float gMatrix[4][4];
|
||||
//typedef float gMatrix[4][4];
|
||||
typedef float gMatrix[16];
|
||||
#endif
|
||||
|
||||
typedef struct MatrixStack
|
||||
{
|
||||
#ifdef SSE2
|
||||
//gMatrix *matrix;
|
||||
float *matrix;
|
||||
#else
|
||||
float *matrix;
|
||||
#endif
|
||||
int position;
|
||||
int size;
|
||||
} MatrixStack;
|
||||
|
||||
void MatrixInit (float *matrix);
|
||||
#ifdef SSE2
|
||||
extern void __fastcall MatrixMultVec3x3 (const gMatrix matrix, const gMatrix vecPtr);
|
||||
extern void __fastcall MatrixMultVec4x4 (const gMatrix matrix, const gMatrix vecPtr);
|
||||
void __fastcall MatrixIdentity (float *matrix);
|
||||
extern void __fastcall MatrixMultiply (const gMatrix matrix, const gMatrix rightMatrix);
|
||||
float __fastcall MatrixGetMultipliedIndex (int index, float *matrix, float *rightMatrix);
|
||||
void __fastcall MatrixSet (float *matrix, int x, int y, float value);
|
||||
void __fastcall MatrixCopy (const gMatrix matrixDST, const gMatrix matrixSRC);
|
||||
extern void __fastcall MatrixTranslate (float *matrix, float *ptr);
|
||||
extern void __fastcall MatrixScale (const gMatrix matrix, const gMatrix ptr);
|
||||
void __fastcall MatrixScale (const gMatrix matrix, const gMatrix ptr);
|
||||
#else
|
||||
void MatrixMultVec3x3 (float *matrix, float *vecPtr);
|
||||
void MatrixMultVec4x4 (float *matrix, float *vecPtr);
|
||||
void MatrixIdentity (float *matrix);
|
||||
void MatrixMultiply (float *matrix, float *rightMatrix);
|
||||
float MatrixGetMultipliedIndex(int index, float *matrix, float *rightMatrix);
|
||||
void MatrixSet (float *matrix, int x, int y, float value);
|
||||
void MatrixCopy (float *matrixDST, float *matrixSRC);
|
||||
void MatrixTranslate (float *matrix, float *ptr);
|
||||
void MatrixScale (float *matrix, float *ptr);
|
||||
#endif
|
||||
|
||||
void MatrixStackInit (MatrixStack *stack);
|
||||
void MatrixStackSetMaxSize (MatrixStack *stack, int size);
|
||||
void MatrixStackSetStackPosition (MatrixStack *stack, int pos);
|
||||
void MatrixStackPushMatrix (MatrixStack *stack, float *ptr);
|
||||
float* MatrixStackPopMatrix (MatrixStack *stack, int size);
|
||||
float* MatrixStackGetPos (MatrixStack *stack, int pos);
|
||||
float* MatrixStackGet (MatrixStack *stack);
|
||||
void MatrixStackLoadMatrix (MatrixStack *stack, int pos, float *ptr);
|
||||
|
||||
#endif
|
||||
/*
|
||||
Copyright (C) 2006-2007 shash
|
||||
|
||||
This file is part of DeSmuME
|
||||
|
||||
DeSmuME is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
DeSmuME is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with DeSmuME; if not, write to the Free Software
|
||||
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
*/
|
||||
|
||||
#ifndef MATRIX_H
|
||||
#define MATRIX_H
|
||||
|
||||
#include "types.h"
|
||||
|
||||
#ifdef SSE2
|
||||
#include <xmmintrin.h>
|
||||
#include <emmintrin.h>
|
||||
//typedef __declspec(align(16)) float gMatrix[4][4];
|
||||
//typedef float gMatrix[4][4];
|
||||
typedef float gMatrix[16];
|
||||
#endif
|
||||
|
||||
typedef struct MatrixStack
|
||||
{
|
||||
#ifdef SSE2
|
||||
//gMatrix *matrix;
|
||||
float *matrix;
|
||||
#else
|
||||
float *matrix;
|
||||
#endif
|
||||
int position;
|
||||
int size;
|
||||
} MatrixStack;
|
||||
|
||||
void MatrixInit (float *matrix);
|
||||
#ifdef SSE2
|
||||
extern void __fastcall MatrixMultVec3x3 (const gMatrix matrix, const gMatrix vecPtr);
|
||||
extern void __fastcall MatrixMultVec4x4 (const gMatrix matrix, const gMatrix vecPtr);
|
||||
void __fastcall MatrixIdentity (float *matrix);
|
||||
extern void __fastcall MatrixMultiply (const gMatrix matrix, const gMatrix rightMatrix);
|
||||
float __fastcall MatrixGetMultipliedIndex (int index, float *matrix, float *rightMatrix);
|
||||
void __fastcall MatrixSet (float *matrix, int x, int y, float value);
|
||||
void __fastcall MatrixCopy (const gMatrix matrixDST, const gMatrix matrixSRC);
|
||||
extern void __fastcall MatrixTranslate (float *matrix, float *ptr);
|
||||
extern void __fastcall MatrixScale (const gMatrix matrix, const gMatrix ptr);
|
||||
void __fastcall MatrixScale (const gMatrix matrix, const gMatrix ptr);
|
||||
#else
|
||||
void MatrixMultVec3x3 (float *matrix, float *vecPtr);
|
||||
void MatrixMultVec4x4 (float *matrix, float *vecPtr);
|
||||
void MatrixIdentity (float *matrix);
|
||||
void MatrixMultiply (float *matrix, float *rightMatrix);
|
||||
float MatrixGetMultipliedIndex(int index, float *matrix, float *rightMatrix);
|
||||
void MatrixSet (float *matrix, int x, int y, float value);
|
||||
void MatrixCopy (float *matrixDST, float *matrixSRC);
|
||||
void MatrixTranslate (float *matrix, float *ptr);
|
||||
void MatrixScale (float *matrix, float *ptr);
|
||||
#endif
|
||||
|
||||
void MatrixTranspose(float *matrix);
|
||||
|
||||
void MatrixStackInit (MatrixStack *stack);
|
||||
void MatrixStackSetMaxSize (MatrixStack *stack, int size);
|
||||
void MatrixStackSetStackPosition (MatrixStack *stack, int pos);
|
||||
void MatrixStackPushMatrix (MatrixStack *stack, float *ptr);
|
||||
float* MatrixStackPopMatrix (MatrixStack *stack, int size);
|
||||
float* MatrixStackGetPos (MatrixStack *stack, int pos);
|
||||
float* MatrixStackGet (MatrixStack *stack);
|
||||
void MatrixStackLoadMatrix (MatrixStack *stack, int pos, float *ptr);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -1,178 +1,180 @@
|
|||
;
|
||||
; Copyright (C) 2006 yopyop
|
||||
; Copyright (C) 2008 CrazyMax
|
||||
;
|
||||
; This file is part of DeSmuME
|
||||
;
|
||||
; DeSmuME is free software; you can redistribute it and/or modify
|
||||
; it under the terms of the GNU General Public License as published by
|
||||
; the Free Software Foundation; either version 2 of the License, or
|
||||
; (at your option) any later version.
|
||||
;
|
||||
; DeSmuME is distributed in the hope that it will be useful,
|
||||
; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
; GNU General Public License for more details.
|
||||
;
|
||||
; You should have received a copy of the GNU General Public License
|
||||
; along with DeSmuME; if not, write to the Free Software
|
||||
; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
TITLE matrix_sse2-x64.asm
|
||||
.code
|
||||
|
||||
MatrixMultVec4x4 PROC PUBLIC
|
||||
movaps xmm0, XMMWORD PTR [rcx]
|
||||
movaps xmm1, XMMWORD PTR [rcx+16]
|
||||
movaps xmm2, XMMWORD PTR [rcx+32]
|
||||
movaps xmm3, XMMWORD PTR [rcx+48]
|
||||
movaps xmm4, XMMWORD PTR [rdx]
|
||||
movaps xmm5, xmm4
|
||||
movaps xmm6, xmm4
|
||||
movaps xmm7, xmm4
|
||||
shufps xmm4, xmm4, 00000000b
|
||||
shufps xmm5, xmm5, 01010101b
|
||||
shufps xmm6, xmm6, 10101010b
|
||||
mulps xmm4, xmm0
|
||||
mulps xmm5, xmm1
|
||||
mulps xmm6, xmm2
|
||||
addps xmm4, xmm5
|
||||
addps xmm4, xmm6
|
||||
addps xmm4, xmm3
|
||||
movaps XMMWORD PTR [rdx], xmm4
|
||||
ret 0
|
||||
MatrixMultVec4x4 ENDP
|
||||
|
||||
MatrixMultVec3x3 PROC PUBLIC
|
||||
movaps xmm0, XMMWORD PTR [rcx]
|
||||
movaps xmm1, XMMWORD PTR [rcx+16]
|
||||
movaps xmm2, XMMWORD PTR [rcx+32]
|
||||
movaps xmm4, XMMWORD PTR [rdx]
|
||||
movaps xmm5, xmm4
|
||||
movaps xmm6, xmm4
|
||||
movaps xmm7, xmm4
|
||||
shufps xmm4, xmm4, 00000000b
|
||||
shufps xmm5, xmm5, 01010101b
|
||||
shufps xmm6, xmm6, 10101010b
|
||||
mulps xmm4, xmm0
|
||||
mulps xmm5, xmm1
|
||||
mulps xmm6, xmm2
|
||||
addps xmm4, xmm5
|
||||
addps xmm4, xmm6
|
||||
movaps XMMWORD PTR [rdx], xmm4
|
||||
MatrixMultVec3x3 ENDP
|
||||
|
||||
MatrixMultiply PROC PUBLIC
|
||||
movaps xmm0, XMMWORD PTR [rcx]
|
||||
movaps xmm1, XMMWORD PTR [rcx+16]
|
||||
movaps xmm2, XMMWORD PTR [rcx+32]
|
||||
movaps xmm3, XMMWORD PTR [rcx+48]
|
||||
movaps xmm4, XMMWORD PTR [rdx] ; r00, r01, r02, r03
|
||||
movaps xmm5,xmm4
|
||||
movaps xmm6,xmm4
|
||||
movaps xmm7,xmm4
|
||||
shufps xmm4,xmm4,00000000b
|
||||
shufps xmm5,xmm5,01010101b
|
||||
shufps xmm6,xmm6,10101010b
|
||||
shufps xmm7,xmm7,11111111b
|
||||
mulps xmm4,xmm0
|
||||
mulps xmm5,xmm1
|
||||
mulps xmm6,xmm2
|
||||
mulps xmm7,xmm3
|
||||
addps xmm4,xmm5
|
||||
addps xmm4,xmm6
|
||||
addps xmm4,xmm7
|
||||
movaps XMMWORD PTR [rcx],xmm4
|
||||
movaps xmm4, XMMWORD PTR [rdx+16] ; r04, r05, r06, r07
|
||||
movaps xmm5,xmm4
|
||||
movaps xmm6,xmm4
|
||||
movaps xmm7,xmm4
|
||||
shufps xmm4,xmm4,00000000b
|
||||
shufps xmm5,xmm5,01010101b
|
||||
shufps xmm6,xmm6,10101010b
|
||||
shufps xmm7,xmm7,11111111b
|
||||
mulps xmm4,xmm0
|
||||
mulps xmm5,xmm1
|
||||
mulps xmm6,xmm2
|
||||
mulps xmm7,xmm3
|
||||
addps xmm4,xmm5
|
||||
addps xmm4,xmm6
|
||||
addps xmm4,xmm7
|
||||
movaps XMMWORD PTR [rcx+16],xmm4
|
||||
movaps xmm4, XMMWORD PTR [rdx+32] ; r08, r09, r10, r11
|
||||
movaps xmm5,xmm4
|
||||
movaps xmm6,xmm4
|
||||
movaps xmm7,xmm4
|
||||
shufps xmm4,xmm4,00000000b
|
||||
shufps xmm5,xmm5,01010101b
|
||||
shufps xmm6,xmm6,10101010b
|
||||
shufps xmm7,xmm7,11111111b
|
||||
mulps xmm4,xmm0
|
||||
mulps xmm5,xmm1
|
||||
mulps xmm6,xmm2
|
||||
mulps xmm7,xmm3
|
||||
addps xmm4,xmm5
|
||||
addps xmm4,xmm6
|
||||
addps xmm4,xmm7
|
||||
movaps XMMWORD PTR [rcx+32],xmm4
|
||||
movaps xmm4, XMMWORD PTR [rdx+48] ; r12, r13, r14, r15
|
||||
movaps xmm5,xmm4
|
||||
movaps xmm6,xmm4
|
||||
movaps xmm7,xmm4
|
||||
shufps xmm4,xmm4,00000000b
|
||||
shufps xmm5,xmm5,01010101b
|
||||
shufps xmm6,xmm6,10101010b
|
||||
shufps xmm7,xmm7,11111111b
|
||||
mulps xmm4,xmm0
|
||||
mulps xmm5,xmm1
|
||||
mulps xmm6,xmm2
|
||||
mulps xmm7,xmm3
|
||||
addps xmm4,xmm5
|
||||
addps xmm4,xmm6
|
||||
addps xmm4,xmm7
|
||||
movaps XMMWORD PTR [rcx+48],xmm4
|
||||
ret 0
|
||||
MatrixMultiply ENDP
|
||||
|
||||
MatrixTranslate PROC PUBLIC
|
||||
movaps xmm0, XMMWORD PTR [rcx]
|
||||
movaps xmm1, XMMWORD PTR [rcx+16]
|
||||
movaps xmm2, XMMWORD PTR [rcx+32]
|
||||
movaps xmm3, XMMWORD PTR [rcx+48]
|
||||
movaps xmm4, XMMWORD PTR [rdx]
|
||||
movaps xmm5, xmm4
|
||||
movaps xmm6, xmm4
|
||||
movaps xmm7, xmm4
|
||||
shufps xmm4, xmm4, 00000000b
|
||||
shufps xmm5, xmm5, 01010101b
|
||||
shufps xmm6, xmm6, 10101010b
|
||||
mulps xmm4, xmm0
|
||||
mulps xmm5, xmm1
|
||||
mulps xmm6, xmm2
|
||||
addps xmm4, xmm5
|
||||
addps xmm4, xmm6
|
||||
addps xmm4, xmm3
|
||||
movaps XMMWORD PTR [rcx+48], xmm4
|
||||
ret 0
|
||||
MatrixTranslate ENDP
|
||||
|
||||
MatrixScale PROC PUBLIC
|
||||
movaps xmm0, XMMWORD PTR [rcx]
|
||||
movaps xmm1, XMMWORD PTR [rcx+16]
|
||||
movaps xmm2, XMMWORD PTR [rcx+32]
|
||||
movaps xmm4, XMMWORD PTR [rdx]
|
||||
movaps xmm5, xmm4
|
||||
movaps xmm6, xmm4
|
||||
shufps xmm4, xmm4, 00000000b
|
||||
shufps xmm5, xmm5, 01010101b
|
||||
shufps xmm6, xmm6, 10101010b
|
||||
mulps xmm4, xmm0
|
||||
mulps xmm5, xmm1
|
||||
mulps xmm6, xmm2
|
||||
movaps XMMWORD PTR [rcx],xmm4
|
||||
movaps XMMWORD PTR [rcx+16],xmm5
|
||||
movaps XMMWORD PTR [rcx+32],xmm6
|
||||
ret 0
|
||||
MatrixScale ENDP
|
||||
|
||||
end
|
||||
;
|
||||
; Copyright (C) 2006 yopyop
|
||||
; Copyright (C) 2008 CrazyMax
|
||||
;
|
||||
; This file is part of DeSmuME
|
||||
;
|
||||
; DeSmuME is free software; you can redistribute it and/or modify
|
||||
; it under the terms of the GNU General Public License as published by
|
||||
; the Free Software Foundation; either version 2 of the License, or
|
||||
; (at your option) any later version.
|
||||
;
|
||||
; DeSmuME is distributed in the hope that it will be useful,
|
||||
; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
; GNU General Public License for more details.
|
||||
;
|
||||
; You should have received a copy of the GNU General Public License
|
||||
; along with DeSmuME; if not, write to the Free Software
|
||||
; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
TITLE matrix_sse2-x64.asm
|
||||
.code
|
||||
|
||||
MatrixMultVec4x4 PROC PUBLIC
|
||||
movaps xmm0, XMMWORD PTR [rcx]
|
||||
movaps xmm1, XMMWORD PTR [rcx+16]
|
||||
movaps xmm2, XMMWORD PTR [rcx+32]
|
||||
movaps xmm3, XMMWORD PTR [rcx+48]
|
||||
movaps xmm4, XMMWORD PTR [rdx]
|
||||
movaps xmm5, xmm4
|
||||
movaps xmm6, xmm4
|
||||
movaps xmm7, xmm4
|
||||
shufps xmm4, xmm4, 00000000b
|
||||
shufps xmm5, xmm5, 01010101b
|
||||
shufps xmm6, xmm6, 10101010b
|
||||
shufps xmm7, xmm7, 11111111b
|
||||
mulps xmm4, xmm0
|
||||
mulps xmm5, xmm1
|
||||
mulps xmm6, xmm2
|
||||
mulps xmm7, xmm3
|
||||
addps xmm4, xmm5
|
||||
addps xmm4, xmm6
|
||||
addps xmm4, xmm7
|
||||
movaps XMMWORD PTR [rdx], xmm4
|
||||
ret 0
|
||||
MatrixMultVec4x4 ENDP
|
||||
|
||||
MatrixMultVec3x3 PROC PUBLIC
|
||||
movaps xmm0, XMMWORD PTR [rcx]
|
||||
movaps xmm1, XMMWORD PTR [rcx+16]
|
||||
movaps xmm2, XMMWORD PTR [rcx+32]
|
||||
movaps xmm4, XMMWORD PTR [rdx]
|
||||
movaps xmm5, xmm4
|
||||
movaps xmm6, xmm4
|
||||
movaps xmm7, xmm4
|
||||
shufps xmm4, xmm4, 00000000b
|
||||
shufps xmm5, xmm5, 01010101b
|
||||
shufps xmm6, xmm6, 10101010b
|
||||
mulps xmm4, xmm0
|
||||
mulps xmm5, xmm1
|
||||
mulps xmm6, xmm2
|
||||
addps xmm4, xmm5
|
||||
addps xmm4, xmm6
|
||||
movaps XMMWORD PTR [rdx], xmm4
|
||||
MatrixMultVec3x3 ENDP
|
||||
|
||||
MatrixMultiply PROC PUBLIC
|
||||
movaps xmm0, XMMWORD PTR [rcx]
|
||||
movaps xmm1, XMMWORD PTR [rcx+16]
|
||||
movaps xmm2, XMMWORD PTR [rcx+32]
|
||||
movaps xmm3, XMMWORD PTR [rcx+48]
|
||||
movaps xmm4, XMMWORD PTR [rdx] ; r00, r01, r02, r03
|
||||
movaps xmm5,xmm4
|
||||
movaps xmm6,xmm4
|
||||
movaps xmm7,xmm4
|
||||
shufps xmm4,xmm4,00000000b
|
||||
shufps xmm5,xmm5,01010101b
|
||||
shufps xmm6,xmm6,10101010b
|
||||
shufps xmm7,xmm7,11111111b
|
||||
mulps xmm4,xmm0
|
||||
mulps xmm5,xmm1
|
||||
mulps xmm6,xmm2
|
||||
mulps xmm7,xmm3
|
||||
addps xmm4,xmm5
|
||||
addps xmm4,xmm6
|
||||
addps xmm4,xmm7
|
||||
movaps XMMWORD PTR [rcx],xmm4
|
||||
movaps xmm4, XMMWORD PTR [rdx+16] ; r04, r05, r06, r07
|
||||
movaps xmm5,xmm4
|
||||
movaps xmm6,xmm4
|
||||
movaps xmm7,xmm4
|
||||
shufps xmm4,xmm4,00000000b
|
||||
shufps xmm5,xmm5,01010101b
|
||||
shufps xmm6,xmm6,10101010b
|
||||
shufps xmm7,xmm7,11111111b
|
||||
mulps xmm4,xmm0
|
||||
mulps xmm5,xmm1
|
||||
mulps xmm6,xmm2
|
||||
mulps xmm7,xmm3
|
||||
addps xmm4,xmm5
|
||||
addps xmm4,xmm6
|
||||
addps xmm4,xmm7
|
||||
movaps XMMWORD PTR [rcx+16],xmm4
|
||||
movaps xmm4, XMMWORD PTR [rdx+32] ; r08, r09, r10, r11
|
||||
movaps xmm5,xmm4
|
||||
movaps xmm6,xmm4
|
||||
movaps xmm7,xmm4
|
||||
shufps xmm4,xmm4,00000000b
|
||||
shufps xmm5,xmm5,01010101b
|
||||
shufps xmm6,xmm6,10101010b
|
||||
shufps xmm7,xmm7,11111111b
|
||||
mulps xmm4,xmm0
|
||||
mulps xmm5,xmm1
|
||||
mulps xmm6,xmm2
|
||||
mulps xmm7,xmm3
|
||||
addps xmm4,xmm5
|
||||
addps xmm4,xmm6
|
||||
addps xmm4,xmm7
|
||||
movaps XMMWORD PTR [rcx+32],xmm4
|
||||
movaps xmm4, XMMWORD PTR [rdx+48] ; r12, r13, r14, r15
|
||||
movaps xmm5,xmm4
|
||||
movaps xmm6,xmm4
|
||||
movaps xmm7,xmm4
|
||||
shufps xmm4,xmm4,00000000b
|
||||
shufps xmm5,xmm5,01010101b
|
||||
shufps xmm6,xmm6,10101010b
|
||||
shufps xmm7,xmm7,11111111b
|
||||
mulps xmm4,xmm0
|
||||
mulps xmm5,xmm1
|
||||
mulps xmm6,xmm2
|
||||
mulps xmm7,xmm3
|
||||
addps xmm4,xmm5
|
||||
addps xmm4,xmm6
|
||||
addps xmm4,xmm7
|
||||
movaps XMMWORD PTR [rcx+48],xmm4
|
||||
ret 0
|
||||
MatrixMultiply ENDP
|
||||
|
||||
MatrixTranslate PROC PUBLIC
|
||||
movaps xmm0, XMMWORD PTR [rcx]
|
||||
movaps xmm1, XMMWORD PTR [rcx+16]
|
||||
movaps xmm2, XMMWORD PTR [rcx+32]
|
||||
movaps xmm3, XMMWORD PTR [rcx+48]
|
||||
movaps xmm4, XMMWORD PTR [rdx]
|
||||
movaps xmm5, xmm4
|
||||
movaps xmm6, xmm4
|
||||
movaps xmm7, xmm4
|
||||
shufps xmm4, xmm4, 00000000b
|
||||
shufps xmm5, xmm5, 01010101b
|
||||
shufps xmm6, xmm6, 10101010b
|
||||
mulps xmm4, xmm0
|
||||
mulps xmm5, xmm1
|
||||
mulps xmm6, xmm2
|
||||
addps xmm4, xmm5
|
||||
addps xmm4, xmm6
|
||||
addps xmm4, xmm3
|
||||
movaps XMMWORD PTR [rcx+48], xmm4
|
||||
ret 0
|
||||
MatrixTranslate ENDP
|
||||
|
||||
MatrixScale PROC PUBLIC
|
||||
movaps xmm0, XMMWORD PTR [rcx]
|
||||
movaps xmm1, XMMWORD PTR [rcx+16]
|
||||
movaps xmm2, XMMWORD PTR [rcx+32]
|
||||
movaps xmm4, XMMWORD PTR [rdx]
|
||||
movaps xmm5, xmm4
|
||||
movaps xmm6, xmm4
|
||||
shufps xmm4, xmm4, 00000000b
|
||||
shufps xmm5, xmm5, 01010101b
|
||||
shufps xmm6, xmm6, 10101010b
|
||||
mulps xmm4, xmm0
|
||||
mulps xmm5, xmm1
|
||||
mulps xmm6, xmm2
|
||||
movaps XMMWORD PTR [rcx],xmm4
|
||||
movaps XMMWORD PTR [rcx+16],xmm5
|
||||
movaps XMMWORD PTR [rcx+32],xmm6
|
||||
ret 0
|
||||
MatrixScale ENDP
|
||||
|
||||
end
|
||||
|
|
|
@ -1,183 +1,185 @@
|
|||
;
|
||||
; Copyright (C) 2006 yopyop
|
||||
; Copyright (C) 2008 CrazyMax
|
||||
;
|
||||
; This file is part of DeSmuME
|
||||
;
|
||||
; DeSmuME is free software; you can redistribute it and/or modify
|
||||
; it under the terms of the GNU General Public License as published by
|
||||
; the Free Software Foundation; either version 2 of the License, or
|
||||
; (at your option) any later version.
|
||||
;
|
||||
; DeSmuME is distributed in the hope that it will be useful,
|
||||
; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
; GNU General Public License for more details.
|
||||
;
|
||||
; You should have received a copy of the GNU General Public License
|
||||
; along with DeSmuME; if not, write to the Free Software
|
||||
; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
TITLE matrix_sse2-x86.asm
|
||||
.686P
|
||||
.XMM
|
||||
.model flat
|
||||
.code
|
||||
|
||||
@MatrixMultVec4x4@8 PROC PUBLIC
|
||||
movaps xmm0, XMMWORD PTR [ecx]
|
||||
movaps xmm1, XMMWORD PTR [ecx+16]
|
||||
movaps xmm2, XMMWORD PTR [ecx+32]
|
||||
movaps xmm3, XMMWORD PTR [ecx+48]
|
||||
movaps xmm4, XMMWORD PTR [edx]
|
||||
movaps xmm5, xmm4
|
||||
movaps xmm6, xmm4
|
||||
movaps xmm7, xmm4
|
||||
shufps xmm4, xmm4, 00000000b
|
||||
shufps xmm5, xmm5, 01010101b
|
||||
shufps xmm6, xmm6, 10101010b
|
||||
mulps xmm4, xmm0
|
||||
mulps xmm5, xmm1
|
||||
mulps xmm6, xmm2
|
||||
addps xmm4, xmm5
|
||||
addps xmm4, xmm6
|
||||
addps xmm4, xmm3
|
||||
movaps XMMWORD PTR [edx], xmm4
|
||||
ret 0
|
||||
@MatrixMultVec4x4@8 ENDP
|
||||
|
||||
@MatrixMultVec3x3@8 PROC PUBLIC
|
||||
movaps xmm0, XMMWORD PTR [ecx]
|
||||
movaps xmm1, XMMWORD PTR [ecx+16]
|
||||
movaps xmm2, XMMWORD PTR [ecx+32]
|
||||
movaps xmm4, XMMWORD PTR [edx]
|
||||
movaps xmm5, xmm4
|
||||
movaps xmm6, xmm4
|
||||
movaps xmm7, xmm4
|
||||
shufps xmm4, xmm4, 00000000b
|
||||
shufps xmm5, xmm5, 01010101b
|
||||
shufps xmm6, xmm6, 10101010b
|
||||
mulps xmm4, xmm0
|
||||
mulps xmm5, xmm1
|
||||
mulps xmm6, xmm2
|
||||
addps xmm4, xmm5
|
||||
addps xmm4, xmm6
|
||||
movaps XMMWORD PTR [edx], xmm4
|
||||
ret 0
|
||||
@MatrixMultVec3x3@8 ENDP
|
||||
|
||||
@MatrixMultiply@8 PROC PUBLIC
|
||||
movaps xmm0, XMMWORD PTR [ecx]
|
||||
movaps xmm1, XMMWORD PTR [ecx+16]
|
||||
movaps xmm2, XMMWORD PTR [ecx+32]
|
||||
movaps xmm3, XMMWORD PTR [ecx+48]
|
||||
movaps xmm4, XMMWORD PTR [edx] ; r00, r01, r02, r03
|
||||
movaps xmm5,xmm4
|
||||
movaps xmm6,xmm4
|
||||
movaps xmm7,xmm4
|
||||
shufps xmm4,xmm4,00000000b
|
||||
shufps xmm5,xmm5,01010101b
|
||||
shufps xmm6,xmm6,10101010b
|
||||
shufps xmm7,xmm7,11111111b
|
||||
mulps xmm4,xmm0
|
||||
mulps xmm5,xmm1
|
||||
mulps xmm6,xmm2
|
||||
mulps xmm7,xmm3
|
||||
addps xmm4,xmm5
|
||||
addps xmm4,xmm6
|
||||
addps xmm4,xmm7
|
||||
movaps XMMWORD PTR [ecx],xmm4
|
||||
movaps xmm4, XMMWORD PTR [edx+16] ; r04, r05, r06, r07
|
||||
movaps xmm5,xmm4
|
||||
movaps xmm6,xmm4
|
||||
movaps xmm7,xmm4
|
||||
shufps xmm4,xmm4,00000000b
|
||||
shufps xmm5,xmm5,01010101b
|
||||
shufps xmm6,xmm6,10101010b
|
||||
shufps xmm7,xmm7,11111111b
|
||||
mulps xmm4,xmm0
|
||||
mulps xmm5,xmm1
|
||||
mulps xmm6,xmm2
|
||||
mulps xmm7,xmm3
|
||||
addps xmm4,xmm5
|
||||
addps xmm4,xmm6
|
||||
addps xmm4,xmm7
|
||||
movaps XMMWORD PTR [ecx+16],xmm4
|
||||
movaps xmm4, XMMWORD PTR [edx+32] ; r08, r09, r10, r11
|
||||
movaps xmm5,xmm4
|
||||
movaps xmm6,xmm4
|
||||
movaps xmm7,xmm4
|
||||
shufps xmm4,xmm4,00000000b
|
||||
shufps xmm5,xmm5,01010101b
|
||||
shufps xmm6,xmm6,10101010b
|
||||
shufps xmm7,xmm7,11111111b
|
||||
mulps xmm4,xmm0
|
||||
mulps xmm5,xmm1
|
||||
mulps xmm6,xmm2
|
||||
mulps xmm7,xmm3
|
||||
addps xmm4,xmm5
|
||||
addps xmm4,xmm6
|
||||
addps xmm4,xmm7
|
||||
movaps XMMWORD PTR [ecx+32],xmm4
|
||||
movaps xmm4, XMMWORD PTR [edx+48] ; r12, r13, r14, r15
|
||||
movaps xmm5,xmm4
|
||||
movaps xmm6,xmm4
|
||||
movaps xmm7,xmm4
|
||||
shufps xmm4,xmm4,00000000b
|
||||
shufps xmm5,xmm5,01010101b
|
||||
shufps xmm6,xmm6,10101010b
|
||||
shufps xmm7,xmm7,11111111b
|
||||
mulps xmm4,xmm0
|
||||
mulps xmm5,xmm1
|
||||
mulps xmm6,xmm2
|
||||
mulps xmm7,xmm3
|
||||
addps xmm4,xmm5
|
||||
addps xmm4,xmm6
|
||||
addps xmm4,xmm7
|
||||
movaps XMMWORD PTR [ecx+48],xmm4
|
||||
ret 0
|
||||
@MatrixMultiply@8 ENDP
|
||||
|
||||
@MatrixTranslate@8 PROC PUBLIC
|
||||
movaps xmm0, XMMWORD PTR [ecx]
|
||||
movaps xmm1, XMMWORD PTR [ecx+16]
|
||||
movaps xmm2, XMMWORD PTR [ecx+32]
|
||||
movaps xmm3, XMMWORD PTR [ecx+48]
|
||||
movaps xmm4, XMMWORD PTR [edx]
|
||||
movaps xmm5, xmm4
|
||||
movaps xmm6, xmm4
|
||||
movaps xmm7, xmm4
|
||||
shufps xmm4, xmm4, 00000000b
|
||||
shufps xmm5, xmm5, 01010101b
|
||||
shufps xmm6, xmm6, 10101010b
|
||||
mulps xmm4, xmm0
|
||||
mulps xmm5, xmm1
|
||||
mulps xmm6, xmm2
|
||||
addps xmm4, xmm5
|
||||
addps xmm4, xmm6
|
||||
addps xmm4, xmm3
|
||||
movaps XMMWORD PTR [ecx+48], xmm4
|
||||
ret 0
|
||||
@MatrixTranslate@8 ENDP
|
||||
|
||||
@MatrixScale@8 PROC PUBLIC
|
||||
movaps xmm0, XMMWORD PTR [ecx]
|
||||
movaps xmm1, XMMWORD PTR [ecx+16]
|
||||
movaps xmm2, XMMWORD PTR [ecx+32]
|
||||
movaps xmm4, XMMWORD PTR [edx]
|
||||
movaps xmm5, xmm4
|
||||
movaps xmm6, xmm4
|
||||
shufps xmm4, xmm4, 00000000b
|
||||
shufps xmm5, xmm5, 01010101b
|
||||
shufps xmm6, xmm6, 10101010b
|
||||
mulps xmm4, xmm0
|
||||
mulps xmm5, xmm1
|
||||
mulps xmm6, xmm2
|
||||
movaps XMMWORD PTR [ecx],xmm4
|
||||
movaps XMMWORD PTR [ecx+16],xmm5
|
||||
movaps XMMWORD PTR [ecx+32],xmm6
|
||||
ret 0
|
||||
@MatrixScale@8 ENDP
|
||||
|
||||
end
|
||||
|
||||
;
|
||||
; Copyright (C) 2006 yopyop
|
||||
; Copyright (C) 2008 CrazyMax
|
||||
;
|
||||
; This file is part of DeSmuME
|
||||
;
|
||||
; DeSmuME is free software; you can redistribute it and/or modify
|
||||
; it under the terms of the GNU General Public License as published by
|
||||
; the Free Software Foundation; either version 2 of the License, or
|
||||
; (at your option) any later version.
|
||||
;
|
||||
; DeSmuME is distributed in the hope that it will be useful,
|
||||
; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
; GNU General Public License for more details.
|
||||
;
|
||||
; You should have received a copy of the GNU General Public License
|
||||
; along with DeSmuME; if not, write to the Free Software
|
||||
; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
||||
|
||||
TITLE matrix_sse2-x86.asm
|
||||
.686P
|
||||
.XMM
|
||||
.model flat
|
||||
.code
|
||||
|
||||
@MatrixMultVec4x4@8 PROC PUBLIC
|
||||
movaps xmm0, XMMWORD PTR [ecx]
|
||||
movaps xmm1, XMMWORD PTR [ecx+16]
|
||||
movaps xmm2, XMMWORD PTR [ecx+32]
|
||||
movaps xmm3, XMMWORD PTR [ecx+48]
|
||||
movaps xmm4, XMMWORD PTR [edx]
|
||||
movaps xmm5, xmm4
|
||||
movaps xmm6, xmm4
|
||||
movaps xmm7, xmm4
|
||||
shufps xmm4, xmm4, 00000000b
|
||||
shufps xmm5, xmm5, 01010101b
|
||||
shufps xmm6, xmm6, 10101010b
|
||||
shufps xmm7, xmm7, 11111111b
|
||||
mulps xmm4, xmm0
|
||||
mulps xmm5, xmm1
|
||||
mulps xmm6, xmm2
|
||||
mulps xmm7, xmm3
|
||||
addps xmm4, xmm5
|
||||
addps xmm4, xmm6
|
||||
addps xmm4, xmm7
|
||||
movaps XMMWORD PTR [edx], xmm4
|
||||
ret 0
|
||||
@MatrixMultVec4x4@8 ENDP
|
||||
|
||||
@MatrixMultVec3x3@8 PROC PUBLIC
|
||||
movaps xmm0, XMMWORD PTR [ecx]
|
||||
movaps xmm1, XMMWORD PTR [ecx+16]
|
||||
movaps xmm2, XMMWORD PTR [ecx+32]
|
||||
movaps xmm4, XMMWORD PTR [edx]
|
||||
movaps xmm5, xmm4
|
||||
movaps xmm6, xmm4
|
||||
movaps xmm7, xmm4
|
||||
shufps xmm4, xmm4, 00000000b
|
||||
shufps xmm5, xmm5, 01010101b
|
||||
shufps xmm6, xmm6, 10101010b
|
||||
mulps xmm4, xmm0
|
||||
mulps xmm5, xmm1
|
||||
mulps xmm6, xmm2
|
||||
addps xmm4, xmm5
|
||||
addps xmm4, xmm6
|
||||
movaps XMMWORD PTR [edx], xmm4
|
||||
ret 0
|
||||
@MatrixMultVec3x3@8 ENDP
|
||||
|
||||
@MatrixMultiply@8 PROC PUBLIC
|
||||
movaps xmm0, XMMWORD PTR [ecx]
|
||||
movaps xmm1, XMMWORD PTR [ecx+16]
|
||||
movaps xmm2, XMMWORD PTR [ecx+32]
|
||||
movaps xmm3, XMMWORD PTR [ecx+48]
|
||||
movaps xmm4, XMMWORD PTR [edx] ; r00, r01, r02, r03
|
||||
movaps xmm5,xmm4
|
||||
movaps xmm6,xmm4
|
||||
movaps xmm7,xmm4
|
||||
shufps xmm4,xmm4,00000000b
|
||||
shufps xmm5,xmm5,01010101b
|
||||
shufps xmm6,xmm6,10101010b
|
||||
shufps xmm7,xmm7,11111111b
|
||||
mulps xmm4,xmm0
|
||||
mulps xmm5,xmm1
|
||||
mulps xmm6,xmm2
|
||||
mulps xmm7,xmm3
|
||||
addps xmm4,xmm5
|
||||
addps xmm4,xmm6
|
||||
addps xmm4,xmm7
|
||||
movaps XMMWORD PTR [ecx],xmm4
|
||||
movaps xmm4, XMMWORD PTR [edx+16] ; r04, r05, r06, r07
|
||||
movaps xmm5,xmm4
|
||||
movaps xmm6,xmm4
|
||||
movaps xmm7,xmm4
|
||||
shufps xmm4,xmm4,00000000b
|
||||
shufps xmm5,xmm5,01010101b
|
||||
shufps xmm6,xmm6,10101010b
|
||||
shufps xmm7,xmm7,11111111b
|
||||
mulps xmm4,xmm0
|
||||
mulps xmm5,xmm1
|
||||
mulps xmm6,xmm2
|
||||
mulps xmm7,xmm3
|
||||
addps xmm4,xmm5
|
||||
addps xmm4,xmm6
|
||||
addps xmm4,xmm7
|
||||
movaps XMMWORD PTR [ecx+16],xmm4
|
||||
movaps xmm4, XMMWORD PTR [edx+32] ; r08, r09, r10, r11
|
||||
movaps xmm5,xmm4
|
||||
movaps xmm6,xmm4
|
||||
movaps xmm7,xmm4
|
||||
shufps xmm4,xmm4,00000000b
|
||||
shufps xmm5,xmm5,01010101b
|
||||
shufps xmm6,xmm6,10101010b
|
||||
shufps xmm7,xmm7,11111111b
|
||||
mulps xmm4,xmm0
|
||||
mulps xmm5,xmm1
|
||||
mulps xmm6,xmm2
|
||||
mulps xmm7,xmm3
|
||||
addps xmm4,xmm5
|
||||
addps xmm4,xmm6
|
||||
addps xmm4,xmm7
|
||||
movaps XMMWORD PTR [ecx+32],xmm4
|
||||
movaps xmm4, XMMWORD PTR [edx+48] ; r12, r13, r14, r15
|
||||
movaps xmm5,xmm4
|
||||
movaps xmm6,xmm4
|
||||
movaps xmm7,xmm4
|
||||
shufps xmm4,xmm4,00000000b
|
||||
shufps xmm5,xmm5,01010101b
|
||||
shufps xmm6,xmm6,10101010b
|
||||
shufps xmm7,xmm7,11111111b
|
||||
mulps xmm4,xmm0
|
||||
mulps xmm5,xmm1
|
||||
mulps xmm6,xmm2
|
||||
mulps xmm7,xmm3
|
||||
addps xmm4,xmm5
|
||||
addps xmm4,xmm6
|
||||
addps xmm4,xmm7
|
||||
movaps XMMWORD PTR [ecx+48],xmm4
|
||||
ret 0
|
||||
@MatrixMultiply@8 ENDP
|
||||
|
||||
@MatrixTranslate@8 PROC PUBLIC
|
||||
movaps xmm0, XMMWORD PTR [ecx]
|
||||
movaps xmm1, XMMWORD PTR [ecx+16]
|
||||
movaps xmm2, XMMWORD PTR [ecx+32]
|
||||
movaps xmm3, XMMWORD PTR [ecx+48]
|
||||
movaps xmm4, XMMWORD PTR [edx]
|
||||
movaps xmm5, xmm4
|
||||
movaps xmm6, xmm4
|
||||
movaps xmm7, xmm4
|
||||
shufps xmm4, xmm4, 00000000b
|
||||
shufps xmm5, xmm5, 01010101b
|
||||
shufps xmm6, xmm6, 10101010b
|
||||
mulps xmm4, xmm0
|
||||
mulps xmm5, xmm1
|
||||
mulps xmm6, xmm2
|
||||
addps xmm4, xmm5
|
||||
addps xmm4, xmm6
|
||||
addps xmm4, xmm3
|
||||
movaps XMMWORD PTR [ecx+48], xmm4
|
||||
ret 0
|
||||
@MatrixTranslate@8 ENDP
|
||||
|
||||
@MatrixScale@8 PROC PUBLIC
|
||||
movaps xmm0, XMMWORD PTR [ecx]
|
||||
movaps xmm1, XMMWORD PTR [ecx+16]
|
||||
movaps xmm2, XMMWORD PTR [ecx+32]
|
||||
movaps xmm4, XMMWORD PTR [edx]
|
||||
movaps xmm5, xmm4
|
||||
movaps xmm6, xmm4
|
||||
shufps xmm4, xmm4, 00000000b
|
||||
shufps xmm5, xmm5, 01010101b
|
||||
shufps xmm6, xmm6, 10101010b
|
||||
mulps xmm4, xmm0
|
||||
mulps xmm5, xmm1
|
||||
mulps xmm6, xmm2
|
||||
movaps XMMWORD PTR [ecx],xmm4
|
||||
movaps XMMWORD PTR [ecx+16],xmm5
|
||||
movaps XMMWORD PTR [ecx+32],xmm6
|
||||
ret 0
|
||||
@MatrixScale@8 ENDP
|
||||
|
||||
end
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue