- Add many of NHerve's improvements into OGLRender because I was trying to fix all the 3d issues

- Track polycount better. still worthless: at the very least, it doesnt account for clipping and culling
- carry w=1 from vertex() through pipeline (this will be necessary for software 3d rendering)
- Make GPU matrix mult and load commands clear out unused rows and cols to identity correctly
- Make matrix 4x4 multiply routines use W-coordinate.
This commit is contained in:
zeromus 2008-09-06 04:08:35 +00:00
parent 7d2fc8964e
commit 5278185e73
6 changed files with 2842 additions and 2749 deletions

View File

@ -27,6 +27,12 @@
- Some fixes in 3D core OGL (fixed textures) [CrazyMax]
- Added texture caching (speedup 3D core) [CrazyMax]
- Fixes clear depth (ex. Castlevania now don't flipping) [NHerve]
- Make matrix 4x4 multiply routines use W-coordinate. [zeromus]
- Make GPU matrix mult and load commands clear out unused rows and cols to identity correctly;
carry w=1 from vertex() through pipeline (this will be necessary for software 3d rendering) [zeromus]
- Track polycount better. still worthless: at the very least, it doesnt account for clipping and culling [zeromus]
- Fix errors in matrix operations regarding projection mode and pos-vector mode [zeromus]
- Fix error in command unpacking which caused some display lists to totally blow up [zeromus]
0.7.3 -> 0.8
Cocoa:

View File

@ -1,228 +1,255 @@
/*
Copyright (C) 2006-2007 shash
This file is part of DeSmuME
DeSmuME is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
DeSmuME is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with DeSmuME; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "matrix.h"
void MatrixInit (float *matrix)
{
memset (matrix, 0, sizeof(float)*16);
matrix[0] = matrix[5] = matrix[10] = matrix[15] = 1.f;
}
#ifdef SSE2
void __fastcall MatrixIdentity (float *matrix) //============== TODO
{
memset (matrix, 0, sizeof(float)*16);
matrix[0] = matrix[5] = matrix[10] = matrix[15] = 1.f;
}
float __fastcall MatrixGetMultipliedIndex (int index, float *matrix, float *rightMatrix)
{
int iMod = index%4, iDiv = (index>>2)<<2;
return (matrix[iMod ]*rightMatrix[iDiv ])+(matrix[iMod+ 4]*rightMatrix[iDiv+1])+
(matrix[iMod+8]*rightMatrix[iDiv+2])+(matrix[iMod+12]*rightMatrix[iDiv+3]);
}
void __fastcall MatrixSet (float *matrix, int x, int y, float value) // TODO
{
matrix [x+(y<<2)] = value;
}
void __fastcall MatrixCopy (float *matrixDST, float *matrixSRC)
{
memcpy (matrixDST, matrixSRC, sizeof(float)*16);
}
#else
void MatrixMultVec4x4 (float *matrix, float *vecPtr)
{
float x = vecPtr[0];
float y = vecPtr[1];
float z = vecPtr[2];
vecPtr[0] = x * matrix[0] + y * matrix[4] + z * matrix[ 8] + matrix[12];
vecPtr[1] = x * matrix[1] + y * matrix[5] + z * matrix[ 9] + matrix[13];
vecPtr[2] = x * matrix[2] + y * matrix[6] + z * matrix[10] + matrix[14];
}
void MatrixMultVec3x3 (float *matrix, float *vecPtr)
{
float x = vecPtr[0];
float y = vecPtr[1];
float z = vecPtr[2];
vecPtr[0] = x * matrix[0] + y * matrix[4] + z * matrix[ 8];
vecPtr[1] = x * matrix[1] + y * matrix[5] + z * matrix[ 9];
vecPtr[2] = x * matrix[2] + y * matrix[6] + z * matrix[10];
}
void MatrixIdentity (float *matrix)
{
memset (matrix, 0, sizeof(float)*16);
matrix[0] = matrix[5] = matrix[10] = matrix[15] = 1.f;
}
void MatrixMultiply (float *matrix, float *rightMatrix)
{
float tmpMatrix[16];
tmpMatrix[0] = (matrix[0]*rightMatrix[0])+(matrix[4]*rightMatrix[1])+(matrix[8]*rightMatrix[2])+(matrix[12]*rightMatrix[3]);
tmpMatrix[1] = (matrix[1]*rightMatrix[0])+(matrix[5]*rightMatrix[1])+(matrix[9]*rightMatrix[2])+(matrix[13]*rightMatrix[3]);
tmpMatrix[2] = (matrix[2]*rightMatrix[0])+(matrix[6]*rightMatrix[1])+(matrix[10]*rightMatrix[2])+(matrix[14]*rightMatrix[3]);
tmpMatrix[3] = (matrix[3]*rightMatrix[0])+(matrix[7]*rightMatrix[1])+(matrix[11]*rightMatrix[2])+(matrix[15]*rightMatrix[3]);
tmpMatrix[4] = (matrix[0]*rightMatrix[4])+(matrix[4]*rightMatrix[5])+(matrix[8]*rightMatrix[6])+(matrix[12]*rightMatrix[7]);
tmpMatrix[5] = (matrix[1]*rightMatrix[4])+(matrix[5]*rightMatrix[5])+(matrix[9]*rightMatrix[6])+(matrix[13]*rightMatrix[7]);
tmpMatrix[6] = (matrix[2]*rightMatrix[4])+(matrix[6]*rightMatrix[5])+(matrix[10]*rightMatrix[6])+(matrix[14]*rightMatrix[7]);
tmpMatrix[7] = (matrix[3]*rightMatrix[4])+(matrix[7]*rightMatrix[5])+(matrix[11]*rightMatrix[6])+(matrix[15]*rightMatrix[7]);
tmpMatrix[8] = (matrix[0]*rightMatrix[8])+(matrix[4]*rightMatrix[9])+(matrix[8]*rightMatrix[10])+(matrix[12]*rightMatrix[11]);
tmpMatrix[9] = (matrix[1]*rightMatrix[8])+(matrix[5]*rightMatrix[9])+(matrix[9]*rightMatrix[10])+(matrix[13]*rightMatrix[11]);
tmpMatrix[10] = (matrix[2]*rightMatrix[8])+(matrix[6]*rightMatrix[9])+(matrix[10]*rightMatrix[10])+(matrix[14]*rightMatrix[11]);
tmpMatrix[11] = (matrix[3]*rightMatrix[8])+(matrix[7]*rightMatrix[9])+(matrix[11]*rightMatrix[10])+(matrix[15]*rightMatrix[11]);
tmpMatrix[12] = (matrix[0]*rightMatrix[12])+(matrix[4]*rightMatrix[13])+(matrix[8]*rightMatrix[14])+(matrix[12]*rightMatrix[15]);
tmpMatrix[13] = (matrix[1]*rightMatrix[12])+(matrix[5]*rightMatrix[13])+(matrix[9]*rightMatrix[14])+(matrix[13]*rightMatrix[15]);
tmpMatrix[14] = (matrix[2]*rightMatrix[12])+(matrix[6]*rightMatrix[13])+(matrix[10]*rightMatrix[14])+(matrix[14]*rightMatrix[15]);
tmpMatrix[15] = (matrix[3]*rightMatrix[12])+(matrix[7]*rightMatrix[13])+(matrix[11]*rightMatrix[14])+(matrix[15]*rightMatrix[15]);
memcpy (matrix, tmpMatrix, sizeof(float)*16);
}
float MatrixGetMultipliedIndex (int index, float *matrix, float *rightMatrix)
{
int iMod = index%4, iDiv = (index>>2)<<2;
return (matrix[iMod ]*rightMatrix[iDiv ])+(matrix[iMod+ 4]*rightMatrix[iDiv+1])+
(matrix[iMod+8]*rightMatrix[iDiv+2])+(matrix[iMod+12]*rightMatrix[iDiv+3]);
}
void MatrixSet (float *matrix, int x, int y, float value)
{
matrix [x+(y<<2)] = value;
}
void MatrixCopy (float *matrixDST, float *matrixSRC)
{
memcpy (matrixDST, matrixSRC, sizeof(float)*16);
}
void MatrixTranslate (float *matrix, float *ptr)
{
matrix[12] += (matrix[0]*ptr[0])+(matrix[4]*ptr[1])+(matrix[ 8]*ptr[2]);
matrix[13] += (matrix[1]*ptr[0])+(matrix[5]*ptr[1])+(matrix[ 9]*ptr[2]);
matrix[14] += (matrix[2]*ptr[0])+(matrix[6]*ptr[1])+(matrix[10]*ptr[2]);
matrix[15] += (matrix[3]*ptr[0])+(matrix[7]*ptr[1])+(matrix[11]*ptr[2]);
}
void MatrixScale (float *matrix, float *ptr)
{
matrix[0] *= ptr[0];
matrix[1] *= ptr[0];
matrix[2] *= ptr[0];
matrix[3] *= ptr[0];
matrix[4] *= ptr[1];
matrix[5] *= ptr[1];
matrix[6] *= ptr[1];
matrix[7] *= ptr[1];
matrix[8] *= ptr[2];
matrix[9] *= ptr[2];
matrix[10] *= ptr[2];
matrix[11] *= ptr[2];
}
#endif
//-----------------------------------------
void MatrixStackInit (MatrixStack *stack)
{
stack->matrix = NULL;
stack->position = 0;
stack->size = 0;
}
void MatrixStackSetMaxSize (MatrixStack *stack, int size)
{
int i = 0;
stack->size = size;
if (stack->matrix == NULL)
{
stack->matrix = (float*) malloc (stack->size*16*sizeof(float));
}
else
{
free (stack->matrix);
stack->matrix = (float*) malloc (stack->size*16*sizeof(float));
}
for (i = 0; i < stack->size; i++)
{
MatrixInit (&stack->matrix[i*16]);
}
stack->size--;
}
void MatrixStackSetStackPosition (MatrixStack *stack, int pos)
{
stack->position += pos;
if (stack->position < 0)
stack->position = 0;
else if (stack->position > stack->size)
stack->position = stack->size;
}
void MatrixStackPushMatrix (MatrixStack *stack, float *ptr)
{
MatrixCopy (&stack->matrix[stack->position*16], ptr);
MatrixStackSetStackPosition (stack, 1);
}
float * MatrixStackPopMatrix (MatrixStack *stack, int size)
{
MatrixStackSetStackPosition(stack, -size);
return &stack->matrix[stack->position*16];
}
float * MatrixStackGetPos (MatrixStack *stack, int pos)
{
return &stack->matrix[pos*16];
}
float * MatrixStackGet (MatrixStack *stack)
{
return &stack->matrix[stack->position*16];
}
void MatrixStackLoadMatrix (MatrixStack *stack, int pos, float *ptr)
{
MatrixCopy (&stack->matrix[pos*16], ptr);
}
/*
Copyright (C) 2006-2007 shash
This file is part of DeSmuME
DeSmuME is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
DeSmuME is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with DeSmuME; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "matrix.h"
void MatrixInit (float *matrix)
{
memset (matrix, 0, sizeof(float)*16);
matrix[0] = matrix[5] = matrix[10] = matrix[15] = 1.f;
}
#ifdef SSE2
void __fastcall MatrixIdentity (float *matrix) //============== TODO
{
memset (matrix, 0, sizeof(float)*16);
matrix[0] = matrix[5] = matrix[10] = matrix[15] = 1.f;
}
float __fastcall MatrixGetMultipliedIndex (int index, float *matrix, float *rightMatrix)
{
int iMod = index%4, iDiv = (index>>2)<<2;
return (matrix[iMod ]*rightMatrix[iDiv ])+(matrix[iMod+ 4]*rightMatrix[iDiv+1])+
(matrix[iMod+8]*rightMatrix[iDiv+2])+(matrix[iMod+12]*rightMatrix[iDiv+3]);
}
void __fastcall MatrixSet (float *matrix, int x, int y, float value) // TODO
{
matrix [x+(y<<2)] = value;
}
void __fastcall MatrixCopy (float *matrixDST, float *matrixSRC)
{
memcpy (matrixDST, matrixSRC, sizeof(float)*16);
}
#else
void MatrixMultVec4x4 (float *matrix, float *vecPtr)
{
float x = vecPtr[0];
float y = vecPtr[1];
float z = vecPtr[2];
float w = vecPtr[3];
vecPtr[0] = x * matrix[0] + y * matrix[4] + z * matrix[ 8] + w * matrix[12];
vecPtr[1] = x * matrix[1] + y * matrix[5] + z * matrix[ 9] + w * matrix[13];
vecPtr[2] = x * matrix[2] + y * matrix[6] + z * matrix[10] + w * matrix[14];
vecPtr[3] = x * matrix[3] + y * matrix[7] + z * matrix[11] + w * matrix[15];
}
void MatrixMultVec3x3 (float *matrix, float *vecPtr)
{
float x = vecPtr[0];
float y = vecPtr[1];
float z = vecPtr[2];
vecPtr[0] = x * matrix[0] + y * matrix[4] + z * matrix[ 8];
vecPtr[1] = x * matrix[1] + y * matrix[5] + z * matrix[ 9];
vecPtr[2] = x * matrix[2] + y * matrix[6] + z * matrix[10];
}
void MatrixIdentity (float *matrix)
{
memset (matrix, 0, sizeof(float)*16);
matrix[0] = matrix[5] = matrix[10] = matrix[15] = 1.f;
}
void MatrixMultiply (float *matrix, float *rightMatrix)
{
float tmpMatrix[16];
tmpMatrix[0] = (matrix[0]*rightMatrix[0])+(matrix[4]*rightMatrix[1])+(matrix[8]*rightMatrix[2])+(matrix[12]*rightMatrix[3]);
tmpMatrix[1] = (matrix[1]*rightMatrix[0])+(matrix[5]*rightMatrix[1])+(matrix[9]*rightMatrix[2])+(matrix[13]*rightMatrix[3]);
tmpMatrix[2] = (matrix[2]*rightMatrix[0])+(matrix[6]*rightMatrix[1])+(matrix[10]*rightMatrix[2])+(matrix[14]*rightMatrix[3]);
tmpMatrix[3] = (matrix[3]*rightMatrix[0])+(matrix[7]*rightMatrix[1])+(matrix[11]*rightMatrix[2])+(matrix[15]*rightMatrix[3]);
tmpMatrix[4] = (matrix[0]*rightMatrix[4])+(matrix[4]*rightMatrix[5])+(matrix[8]*rightMatrix[6])+(matrix[12]*rightMatrix[7]);
tmpMatrix[5] = (matrix[1]*rightMatrix[4])+(matrix[5]*rightMatrix[5])+(matrix[9]*rightMatrix[6])+(matrix[13]*rightMatrix[7]);
tmpMatrix[6] = (matrix[2]*rightMatrix[4])+(matrix[6]*rightMatrix[5])+(matrix[10]*rightMatrix[6])+(matrix[14]*rightMatrix[7]);
tmpMatrix[7] = (matrix[3]*rightMatrix[4])+(matrix[7]*rightMatrix[5])+(matrix[11]*rightMatrix[6])+(matrix[15]*rightMatrix[7]);
tmpMatrix[8] = (matrix[0]*rightMatrix[8])+(matrix[4]*rightMatrix[9])+(matrix[8]*rightMatrix[10])+(matrix[12]*rightMatrix[11]);
tmpMatrix[9] = (matrix[1]*rightMatrix[8])+(matrix[5]*rightMatrix[9])+(matrix[9]*rightMatrix[10])+(matrix[13]*rightMatrix[11]);
tmpMatrix[10] = (matrix[2]*rightMatrix[8])+(matrix[6]*rightMatrix[9])+(matrix[10]*rightMatrix[10])+(matrix[14]*rightMatrix[11]);
tmpMatrix[11] = (matrix[3]*rightMatrix[8])+(matrix[7]*rightMatrix[9])+(matrix[11]*rightMatrix[10])+(matrix[15]*rightMatrix[11]);
tmpMatrix[12] = (matrix[0]*rightMatrix[12])+(matrix[4]*rightMatrix[13])+(matrix[8]*rightMatrix[14])+(matrix[12]*rightMatrix[15]);
tmpMatrix[13] = (matrix[1]*rightMatrix[12])+(matrix[5]*rightMatrix[13])+(matrix[9]*rightMatrix[14])+(matrix[13]*rightMatrix[15]);
tmpMatrix[14] = (matrix[2]*rightMatrix[12])+(matrix[6]*rightMatrix[13])+(matrix[10]*rightMatrix[14])+(matrix[14]*rightMatrix[15]);
tmpMatrix[15] = (matrix[3]*rightMatrix[12])+(matrix[7]*rightMatrix[13])+(matrix[11]*rightMatrix[14])+(matrix[15]*rightMatrix[15]);
memcpy (matrix, tmpMatrix, sizeof(float)*16);
}
float MatrixGetMultipliedIndex (int index, float *matrix, float *rightMatrix)
{
int iMod = index%4, iDiv = (index>>2)<<2;
return (matrix[iMod ]*rightMatrix[iDiv ])+(matrix[iMod+ 4]*rightMatrix[iDiv+1])+
(matrix[iMod+8]*rightMatrix[iDiv+2])+(matrix[iMod+12]*rightMatrix[iDiv+3]);
}
void MatrixSet (float *matrix, int x, int y, float value)
{
matrix [x+(y<<2)] = value;
}
void MatrixTranspose(float *matrix)
{
float temp;
#define swap(A,B) temp = matrix[A];matrix[A] = matrix[B]; matrix[B] = temp;
swap(1,4);
swap(2,8);
swap(3,0xC);
swap(6,9);
swap(7,0xD);
swap(0xB,0xE);
#undef swap
/*
0 1 2 3
4 5 6 7
8 9 A B
C D E F
0 4 8 C
1 5 9 D
2 6 A E
3 7 B F
*/
}
void MatrixCopy (float *matrixDST, float *matrixSRC)
{
memcpy (matrixDST, matrixSRC, sizeof(float)*16);
}
void MatrixTranslate (float *matrix, float *ptr)
{
matrix[12] += (matrix[0]*ptr[0])+(matrix[4]*ptr[1])+(matrix[ 8]*ptr[2]);
matrix[13] += (matrix[1]*ptr[0])+(matrix[5]*ptr[1])+(matrix[ 9]*ptr[2]);
matrix[14] += (matrix[2]*ptr[0])+(matrix[6]*ptr[1])+(matrix[10]*ptr[2]);
matrix[15] += (matrix[3]*ptr[0])+(matrix[7]*ptr[1])+(matrix[11]*ptr[2]);
}
void MatrixScale (float *matrix, float *ptr)
{
matrix[0] *= ptr[0];
matrix[1] *= ptr[0];
matrix[2] *= ptr[0];
matrix[3] *= ptr[0];
matrix[4] *= ptr[1];
matrix[5] *= ptr[1];
matrix[6] *= ptr[1];
matrix[7] *= ptr[1];
matrix[8] *= ptr[2];
matrix[9] *= ptr[2];
matrix[10] *= ptr[2];
matrix[11] *= ptr[2];
}
#endif
//-----------------------------------------
void MatrixStackInit (MatrixStack *stack)
{
stack->matrix = NULL;
stack->position = 0;
stack->size = 0;
}
void MatrixStackSetMaxSize (MatrixStack *stack, int size)
{
int i = 0;
stack->size = size;
if (stack->matrix == NULL)
{
stack->matrix = (float*) malloc (stack->size*16*sizeof(float));
}
else
{
free (stack->matrix);
stack->matrix = (float*) malloc (stack->size*16*sizeof(float));
}
for (i = 0; i < stack->size; i++)
{
MatrixInit (&stack->matrix[i*16]);
}
stack->size--;
}
void MatrixStackSetStackPosition (MatrixStack *stack, int pos)
{
stack->position += pos;
if (stack->position < 0)
stack->position = 0;
else if (stack->position > stack->size)
stack->position = stack->size;
}
void MatrixStackPushMatrix (MatrixStack *stack, float *ptr)
{
MatrixCopy (&stack->matrix[stack->position*16], ptr);
MatrixStackSetStackPosition (stack, 1);
}
float * MatrixStackPopMatrix (MatrixStack *stack, int size)
{
MatrixStackSetStackPosition(stack, -size);
return &stack->matrix[stack->position*16];
}
float * MatrixStackGetPos (MatrixStack *stack, int pos)
{
return &stack->matrix[pos*16];
}
float * MatrixStackGet (MatrixStack *stack)
{
return &stack->matrix[stack->position*16];
}
void MatrixStackLoadMatrix (MatrixStack *stack, int pos, float *ptr)
{
MatrixCopy (&stack->matrix[pos*16], ptr);
}

View File

@ -1,79 +1,81 @@
/*
Copyright (C) 2006-2007 shash
This file is part of DeSmuME
DeSmuME is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
DeSmuME is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with DeSmuME; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef MATRIX_H
#define MATRIX_H
#include "types.h"
#ifdef SSE2
#include <xmmintrin.h>
#include <emmintrin.h>
//typedef __declspec(align(16)) float gMatrix[4][4];
//typedef float gMatrix[4][4];
typedef float gMatrix[16];
#endif
typedef struct MatrixStack
{
#ifdef SSE2
//gMatrix *matrix;
float *matrix;
#else
float *matrix;
#endif
int position;
int size;
} MatrixStack;
void MatrixInit (float *matrix);
#ifdef SSE2
extern void __fastcall MatrixMultVec3x3 (const gMatrix matrix, const gMatrix vecPtr);
extern void __fastcall MatrixMultVec4x4 (const gMatrix matrix, const gMatrix vecPtr);
void __fastcall MatrixIdentity (float *matrix);
extern void __fastcall MatrixMultiply (const gMatrix matrix, const gMatrix rightMatrix);
float __fastcall MatrixGetMultipliedIndex (int index, float *matrix, float *rightMatrix);
void __fastcall MatrixSet (float *matrix, int x, int y, float value);
void __fastcall MatrixCopy (const gMatrix matrixDST, const gMatrix matrixSRC);
extern void __fastcall MatrixTranslate (float *matrix, float *ptr);
extern void __fastcall MatrixScale (const gMatrix matrix, const gMatrix ptr);
void __fastcall MatrixScale (const gMatrix matrix, const gMatrix ptr);
#else
void MatrixMultVec3x3 (float *matrix, float *vecPtr);
void MatrixMultVec4x4 (float *matrix, float *vecPtr);
void MatrixIdentity (float *matrix);
void MatrixMultiply (float *matrix, float *rightMatrix);
float MatrixGetMultipliedIndex(int index, float *matrix, float *rightMatrix);
void MatrixSet (float *matrix, int x, int y, float value);
void MatrixCopy (float *matrixDST, float *matrixSRC);
void MatrixTranslate (float *matrix, float *ptr);
void MatrixScale (float *matrix, float *ptr);
#endif
void MatrixStackInit (MatrixStack *stack);
void MatrixStackSetMaxSize (MatrixStack *stack, int size);
void MatrixStackSetStackPosition (MatrixStack *stack, int pos);
void MatrixStackPushMatrix (MatrixStack *stack, float *ptr);
float* MatrixStackPopMatrix (MatrixStack *stack, int size);
float* MatrixStackGetPos (MatrixStack *stack, int pos);
float* MatrixStackGet (MatrixStack *stack);
void MatrixStackLoadMatrix (MatrixStack *stack, int pos, float *ptr);
#endif
/*
Copyright (C) 2006-2007 shash
This file is part of DeSmuME
DeSmuME is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
DeSmuME is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with DeSmuME; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef MATRIX_H
#define MATRIX_H
#include "types.h"
#ifdef SSE2
#include <xmmintrin.h>
#include <emmintrin.h>
//typedef __declspec(align(16)) float gMatrix[4][4];
//typedef float gMatrix[4][4];
typedef float gMatrix[16];
#endif
typedef struct MatrixStack
{
#ifdef SSE2
//gMatrix *matrix;
float *matrix;
#else
float *matrix;
#endif
int position;
int size;
} MatrixStack;
void MatrixInit (float *matrix);
#ifdef SSE2
extern void __fastcall MatrixMultVec3x3 (const gMatrix matrix, const gMatrix vecPtr);
extern void __fastcall MatrixMultVec4x4 (const gMatrix matrix, const gMatrix vecPtr);
void __fastcall MatrixIdentity (float *matrix);
extern void __fastcall MatrixMultiply (const gMatrix matrix, const gMatrix rightMatrix);
float __fastcall MatrixGetMultipliedIndex (int index, float *matrix, float *rightMatrix);
void __fastcall MatrixSet (float *matrix, int x, int y, float value);
void __fastcall MatrixCopy (const gMatrix matrixDST, const gMatrix matrixSRC);
extern void __fastcall MatrixTranslate (float *matrix, float *ptr);
extern void __fastcall MatrixScale (const gMatrix matrix, const gMatrix ptr);
void __fastcall MatrixScale (const gMatrix matrix, const gMatrix ptr);
#else
void MatrixMultVec3x3 (float *matrix, float *vecPtr);
void MatrixMultVec4x4 (float *matrix, float *vecPtr);
void MatrixIdentity (float *matrix);
void MatrixMultiply (float *matrix, float *rightMatrix);
float MatrixGetMultipliedIndex(int index, float *matrix, float *rightMatrix);
void MatrixSet (float *matrix, int x, int y, float value);
void MatrixCopy (float *matrixDST, float *matrixSRC);
void MatrixTranslate (float *matrix, float *ptr);
void MatrixScale (float *matrix, float *ptr);
#endif
void MatrixTranspose(float *matrix);
void MatrixStackInit (MatrixStack *stack);
void MatrixStackSetMaxSize (MatrixStack *stack, int size);
void MatrixStackSetStackPosition (MatrixStack *stack, int pos);
void MatrixStackPushMatrix (MatrixStack *stack, float *ptr);
float* MatrixStackPopMatrix (MatrixStack *stack, int size);
float* MatrixStackGetPos (MatrixStack *stack, int pos);
float* MatrixStackGet (MatrixStack *stack);
void MatrixStackLoadMatrix (MatrixStack *stack, int pos, float *ptr);
#endif

View File

@ -1,178 +1,180 @@
;
; Copyright (C) 2006 yopyop
; Copyright (C) 2008 CrazyMax
;
; This file is part of DeSmuME
;
; DeSmuME is free software; you can redistribute it and/or modify
; it under the terms of the GNU General Public License as published by
; the Free Software Foundation; either version 2 of the License, or
; (at your option) any later version.
;
; DeSmuME is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
; GNU General Public License for more details.
;
; You should have received a copy of the GNU General Public License
; along with DeSmuME; if not, write to the Free Software
; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
TITLE matrix_sse2-x64.asm
.code
MatrixMultVec4x4 PROC PUBLIC
movaps xmm0, XMMWORD PTR [rcx]
movaps xmm1, XMMWORD PTR [rcx+16]
movaps xmm2, XMMWORD PTR [rcx+32]
movaps xmm3, XMMWORD PTR [rcx+48]
movaps xmm4, XMMWORD PTR [rdx]
movaps xmm5, xmm4
movaps xmm6, xmm4
movaps xmm7, xmm4
shufps xmm4, xmm4, 00000000b
shufps xmm5, xmm5, 01010101b
shufps xmm6, xmm6, 10101010b
mulps xmm4, xmm0
mulps xmm5, xmm1
mulps xmm6, xmm2
addps xmm4, xmm5
addps xmm4, xmm6
addps xmm4, xmm3
movaps XMMWORD PTR [rdx], xmm4
ret 0
MatrixMultVec4x4 ENDP
MatrixMultVec3x3 PROC PUBLIC
movaps xmm0, XMMWORD PTR [rcx]
movaps xmm1, XMMWORD PTR [rcx+16]
movaps xmm2, XMMWORD PTR [rcx+32]
movaps xmm4, XMMWORD PTR [rdx]
movaps xmm5, xmm4
movaps xmm6, xmm4
movaps xmm7, xmm4
shufps xmm4, xmm4, 00000000b
shufps xmm5, xmm5, 01010101b
shufps xmm6, xmm6, 10101010b
mulps xmm4, xmm0
mulps xmm5, xmm1
mulps xmm6, xmm2
addps xmm4, xmm5
addps xmm4, xmm6
movaps XMMWORD PTR [rdx], xmm4
MatrixMultVec3x3 ENDP
MatrixMultiply PROC PUBLIC
movaps xmm0, XMMWORD PTR [rcx]
movaps xmm1, XMMWORD PTR [rcx+16]
movaps xmm2, XMMWORD PTR [rcx+32]
movaps xmm3, XMMWORD PTR [rcx+48]
movaps xmm4, XMMWORD PTR [rdx] ; r00, r01, r02, r03
movaps xmm5,xmm4
movaps xmm6,xmm4
movaps xmm7,xmm4
shufps xmm4,xmm4,00000000b
shufps xmm5,xmm5,01010101b
shufps xmm6,xmm6,10101010b
shufps xmm7,xmm7,11111111b
mulps xmm4,xmm0
mulps xmm5,xmm1
mulps xmm6,xmm2
mulps xmm7,xmm3
addps xmm4,xmm5
addps xmm4,xmm6
addps xmm4,xmm7
movaps XMMWORD PTR [rcx],xmm4
movaps xmm4, XMMWORD PTR [rdx+16] ; r04, r05, r06, r07
movaps xmm5,xmm4
movaps xmm6,xmm4
movaps xmm7,xmm4
shufps xmm4,xmm4,00000000b
shufps xmm5,xmm5,01010101b
shufps xmm6,xmm6,10101010b
shufps xmm7,xmm7,11111111b
mulps xmm4,xmm0
mulps xmm5,xmm1
mulps xmm6,xmm2
mulps xmm7,xmm3
addps xmm4,xmm5
addps xmm4,xmm6
addps xmm4,xmm7
movaps XMMWORD PTR [rcx+16],xmm4
movaps xmm4, XMMWORD PTR [rdx+32] ; r08, r09, r10, r11
movaps xmm5,xmm4
movaps xmm6,xmm4
movaps xmm7,xmm4
shufps xmm4,xmm4,00000000b
shufps xmm5,xmm5,01010101b
shufps xmm6,xmm6,10101010b
shufps xmm7,xmm7,11111111b
mulps xmm4,xmm0
mulps xmm5,xmm1
mulps xmm6,xmm2
mulps xmm7,xmm3
addps xmm4,xmm5
addps xmm4,xmm6
addps xmm4,xmm7
movaps XMMWORD PTR [rcx+32],xmm4
movaps xmm4, XMMWORD PTR [rdx+48] ; r12, r13, r14, r15
movaps xmm5,xmm4
movaps xmm6,xmm4
movaps xmm7,xmm4
shufps xmm4,xmm4,00000000b
shufps xmm5,xmm5,01010101b
shufps xmm6,xmm6,10101010b
shufps xmm7,xmm7,11111111b
mulps xmm4,xmm0
mulps xmm5,xmm1
mulps xmm6,xmm2
mulps xmm7,xmm3
addps xmm4,xmm5
addps xmm4,xmm6
addps xmm4,xmm7
movaps XMMWORD PTR [rcx+48],xmm4
ret 0
MatrixMultiply ENDP
MatrixTranslate PROC PUBLIC
movaps xmm0, XMMWORD PTR [rcx]
movaps xmm1, XMMWORD PTR [rcx+16]
movaps xmm2, XMMWORD PTR [rcx+32]
movaps xmm3, XMMWORD PTR [rcx+48]
movaps xmm4, XMMWORD PTR [rdx]
movaps xmm5, xmm4
movaps xmm6, xmm4
movaps xmm7, xmm4
shufps xmm4, xmm4, 00000000b
shufps xmm5, xmm5, 01010101b
shufps xmm6, xmm6, 10101010b
mulps xmm4, xmm0
mulps xmm5, xmm1
mulps xmm6, xmm2
addps xmm4, xmm5
addps xmm4, xmm6
addps xmm4, xmm3
movaps XMMWORD PTR [rcx+48], xmm4
ret 0
MatrixTranslate ENDP
MatrixScale PROC PUBLIC
movaps xmm0, XMMWORD PTR [rcx]
movaps xmm1, XMMWORD PTR [rcx+16]
movaps xmm2, XMMWORD PTR [rcx+32]
movaps xmm4, XMMWORD PTR [rdx]
movaps xmm5, xmm4
movaps xmm6, xmm4
shufps xmm4, xmm4, 00000000b
shufps xmm5, xmm5, 01010101b
shufps xmm6, xmm6, 10101010b
mulps xmm4, xmm0
mulps xmm5, xmm1
mulps xmm6, xmm2
movaps XMMWORD PTR [rcx],xmm4
movaps XMMWORD PTR [rcx+16],xmm5
movaps XMMWORD PTR [rcx+32],xmm6
ret 0
MatrixScale ENDP
end
;
; Copyright (C) 2006 yopyop
; Copyright (C) 2008 CrazyMax
;
; This file is part of DeSmuME
;
; DeSmuME is free software; you can redistribute it and/or modify
; it under the terms of the GNU General Public License as published by
; the Free Software Foundation; either version 2 of the License, or
; (at your option) any later version.
;
; DeSmuME is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
; GNU General Public License for more details.
;
; You should have received a copy of the GNU General Public License
; along with DeSmuME; if not, write to the Free Software
; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
TITLE matrix_sse2-x64.asm
.code
MatrixMultVec4x4 PROC PUBLIC
movaps xmm0, XMMWORD PTR [rcx]
movaps xmm1, XMMWORD PTR [rcx+16]
movaps xmm2, XMMWORD PTR [rcx+32]
movaps xmm3, XMMWORD PTR [rcx+48]
movaps xmm4, XMMWORD PTR [rdx]
movaps xmm5, xmm4
movaps xmm6, xmm4
movaps xmm7, xmm4
shufps xmm4, xmm4, 00000000b
shufps xmm5, xmm5, 01010101b
shufps xmm6, xmm6, 10101010b
shufps xmm7, xmm7, 11111111b
mulps xmm4, xmm0
mulps xmm5, xmm1
mulps xmm6, xmm2
mulps xmm7, xmm3
addps xmm4, xmm5
addps xmm4, xmm6
addps xmm4, xmm7
movaps XMMWORD PTR [rdx], xmm4
ret 0
MatrixMultVec4x4 ENDP
MatrixMultVec3x3 PROC PUBLIC
movaps xmm0, XMMWORD PTR [rcx]
movaps xmm1, XMMWORD PTR [rcx+16]
movaps xmm2, XMMWORD PTR [rcx+32]
movaps xmm4, XMMWORD PTR [rdx]
movaps xmm5, xmm4
movaps xmm6, xmm4
movaps xmm7, xmm4
shufps xmm4, xmm4, 00000000b
shufps xmm5, xmm5, 01010101b
shufps xmm6, xmm6, 10101010b
mulps xmm4, xmm0
mulps xmm5, xmm1
mulps xmm6, xmm2
addps xmm4, xmm5
addps xmm4, xmm6
movaps XMMWORD PTR [rdx], xmm4
MatrixMultVec3x3 ENDP
MatrixMultiply PROC PUBLIC
movaps xmm0, XMMWORD PTR [rcx]
movaps xmm1, XMMWORD PTR [rcx+16]
movaps xmm2, XMMWORD PTR [rcx+32]
movaps xmm3, XMMWORD PTR [rcx+48]
movaps xmm4, XMMWORD PTR [rdx] ; r00, r01, r02, r03
movaps xmm5,xmm4
movaps xmm6,xmm4
movaps xmm7,xmm4
shufps xmm4,xmm4,00000000b
shufps xmm5,xmm5,01010101b
shufps xmm6,xmm6,10101010b
shufps xmm7,xmm7,11111111b
mulps xmm4,xmm0
mulps xmm5,xmm1
mulps xmm6,xmm2
mulps xmm7,xmm3
addps xmm4,xmm5
addps xmm4,xmm6
addps xmm4,xmm7
movaps XMMWORD PTR [rcx],xmm4
movaps xmm4, XMMWORD PTR [rdx+16] ; r04, r05, r06, r07
movaps xmm5,xmm4
movaps xmm6,xmm4
movaps xmm7,xmm4
shufps xmm4,xmm4,00000000b
shufps xmm5,xmm5,01010101b
shufps xmm6,xmm6,10101010b
shufps xmm7,xmm7,11111111b
mulps xmm4,xmm0
mulps xmm5,xmm1
mulps xmm6,xmm2
mulps xmm7,xmm3
addps xmm4,xmm5
addps xmm4,xmm6
addps xmm4,xmm7
movaps XMMWORD PTR [rcx+16],xmm4
movaps xmm4, XMMWORD PTR [rdx+32] ; r08, r09, r10, r11
movaps xmm5,xmm4
movaps xmm6,xmm4
movaps xmm7,xmm4
shufps xmm4,xmm4,00000000b
shufps xmm5,xmm5,01010101b
shufps xmm6,xmm6,10101010b
shufps xmm7,xmm7,11111111b
mulps xmm4,xmm0
mulps xmm5,xmm1
mulps xmm6,xmm2
mulps xmm7,xmm3
addps xmm4,xmm5
addps xmm4,xmm6
addps xmm4,xmm7
movaps XMMWORD PTR [rcx+32],xmm4
movaps xmm4, XMMWORD PTR [rdx+48] ; r12, r13, r14, r15
movaps xmm5,xmm4
movaps xmm6,xmm4
movaps xmm7,xmm4
shufps xmm4,xmm4,00000000b
shufps xmm5,xmm5,01010101b
shufps xmm6,xmm6,10101010b
shufps xmm7,xmm7,11111111b
mulps xmm4,xmm0
mulps xmm5,xmm1
mulps xmm6,xmm2
mulps xmm7,xmm3
addps xmm4,xmm5
addps xmm4,xmm6
addps xmm4,xmm7
movaps XMMWORD PTR [rcx+48],xmm4
ret 0
MatrixMultiply ENDP
MatrixTranslate PROC PUBLIC
movaps xmm0, XMMWORD PTR [rcx]
movaps xmm1, XMMWORD PTR [rcx+16]
movaps xmm2, XMMWORD PTR [rcx+32]
movaps xmm3, XMMWORD PTR [rcx+48]
movaps xmm4, XMMWORD PTR [rdx]
movaps xmm5, xmm4
movaps xmm6, xmm4
movaps xmm7, xmm4
shufps xmm4, xmm4, 00000000b
shufps xmm5, xmm5, 01010101b
shufps xmm6, xmm6, 10101010b
mulps xmm4, xmm0
mulps xmm5, xmm1
mulps xmm6, xmm2
addps xmm4, xmm5
addps xmm4, xmm6
addps xmm4, xmm3
movaps XMMWORD PTR [rcx+48], xmm4
ret 0
MatrixTranslate ENDP
MatrixScale PROC PUBLIC
movaps xmm0, XMMWORD PTR [rcx]
movaps xmm1, XMMWORD PTR [rcx+16]
movaps xmm2, XMMWORD PTR [rcx+32]
movaps xmm4, XMMWORD PTR [rdx]
movaps xmm5, xmm4
movaps xmm6, xmm4
shufps xmm4, xmm4, 00000000b
shufps xmm5, xmm5, 01010101b
shufps xmm6, xmm6, 10101010b
mulps xmm4, xmm0
mulps xmm5, xmm1
mulps xmm6, xmm2
movaps XMMWORD PTR [rcx],xmm4
movaps XMMWORD PTR [rcx+16],xmm5
movaps XMMWORD PTR [rcx+32],xmm6
ret 0
MatrixScale ENDP
end

View File

@ -1,183 +1,185 @@
;
; Copyright (C) 2006 yopyop
; Copyright (C) 2008 CrazyMax
;
; This file is part of DeSmuME
;
; DeSmuME is free software; you can redistribute it and/or modify
; it under the terms of the GNU General Public License as published by
; the Free Software Foundation; either version 2 of the License, or
; (at your option) any later version.
;
; DeSmuME is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
; GNU General Public License for more details.
;
; You should have received a copy of the GNU General Public License
; along with DeSmuME; if not, write to the Free Software
; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
TITLE matrix_sse2-x86.asm
.686P
.XMM
.model flat
.code
@MatrixMultVec4x4@8 PROC PUBLIC
movaps xmm0, XMMWORD PTR [ecx]
movaps xmm1, XMMWORD PTR [ecx+16]
movaps xmm2, XMMWORD PTR [ecx+32]
movaps xmm3, XMMWORD PTR [ecx+48]
movaps xmm4, XMMWORD PTR [edx]
movaps xmm5, xmm4
movaps xmm6, xmm4
movaps xmm7, xmm4
shufps xmm4, xmm4, 00000000b
shufps xmm5, xmm5, 01010101b
shufps xmm6, xmm6, 10101010b
mulps xmm4, xmm0
mulps xmm5, xmm1
mulps xmm6, xmm2
addps xmm4, xmm5
addps xmm4, xmm6
addps xmm4, xmm3
movaps XMMWORD PTR [edx], xmm4
ret 0
@MatrixMultVec4x4@8 ENDP
@MatrixMultVec3x3@8 PROC PUBLIC
movaps xmm0, XMMWORD PTR [ecx]
movaps xmm1, XMMWORD PTR [ecx+16]
movaps xmm2, XMMWORD PTR [ecx+32]
movaps xmm4, XMMWORD PTR [edx]
movaps xmm5, xmm4
movaps xmm6, xmm4
movaps xmm7, xmm4
shufps xmm4, xmm4, 00000000b
shufps xmm5, xmm5, 01010101b
shufps xmm6, xmm6, 10101010b
mulps xmm4, xmm0
mulps xmm5, xmm1
mulps xmm6, xmm2
addps xmm4, xmm5
addps xmm4, xmm6
movaps XMMWORD PTR [edx], xmm4
ret 0
@MatrixMultVec3x3@8 ENDP
@MatrixMultiply@8 PROC PUBLIC
movaps xmm0, XMMWORD PTR [ecx]
movaps xmm1, XMMWORD PTR [ecx+16]
movaps xmm2, XMMWORD PTR [ecx+32]
movaps xmm3, XMMWORD PTR [ecx+48]
movaps xmm4, XMMWORD PTR [edx] ; r00, r01, r02, r03
movaps xmm5,xmm4
movaps xmm6,xmm4
movaps xmm7,xmm4
shufps xmm4,xmm4,00000000b
shufps xmm5,xmm5,01010101b
shufps xmm6,xmm6,10101010b
shufps xmm7,xmm7,11111111b
mulps xmm4,xmm0
mulps xmm5,xmm1
mulps xmm6,xmm2
mulps xmm7,xmm3
addps xmm4,xmm5
addps xmm4,xmm6
addps xmm4,xmm7
movaps XMMWORD PTR [ecx],xmm4
movaps xmm4, XMMWORD PTR [edx+16] ; r04, r05, r06, r07
movaps xmm5,xmm4
movaps xmm6,xmm4
movaps xmm7,xmm4
shufps xmm4,xmm4,00000000b
shufps xmm5,xmm5,01010101b
shufps xmm6,xmm6,10101010b
shufps xmm7,xmm7,11111111b
mulps xmm4,xmm0
mulps xmm5,xmm1
mulps xmm6,xmm2
mulps xmm7,xmm3
addps xmm4,xmm5
addps xmm4,xmm6
addps xmm4,xmm7
movaps XMMWORD PTR [ecx+16],xmm4
movaps xmm4, XMMWORD PTR [edx+32] ; r08, r09, r10, r11
movaps xmm5,xmm4
movaps xmm6,xmm4
movaps xmm7,xmm4
shufps xmm4,xmm4,00000000b
shufps xmm5,xmm5,01010101b
shufps xmm6,xmm6,10101010b
shufps xmm7,xmm7,11111111b
mulps xmm4,xmm0
mulps xmm5,xmm1
mulps xmm6,xmm2
mulps xmm7,xmm3
addps xmm4,xmm5
addps xmm4,xmm6
addps xmm4,xmm7
movaps XMMWORD PTR [ecx+32],xmm4
movaps xmm4, XMMWORD PTR [edx+48] ; r12, r13, r14, r15
movaps xmm5,xmm4
movaps xmm6,xmm4
movaps xmm7,xmm4
shufps xmm4,xmm4,00000000b
shufps xmm5,xmm5,01010101b
shufps xmm6,xmm6,10101010b
shufps xmm7,xmm7,11111111b
mulps xmm4,xmm0
mulps xmm5,xmm1
mulps xmm6,xmm2
mulps xmm7,xmm3
addps xmm4,xmm5
addps xmm4,xmm6
addps xmm4,xmm7
movaps XMMWORD PTR [ecx+48],xmm4
ret 0
@MatrixMultiply@8 ENDP
@MatrixTranslate@8 PROC PUBLIC
movaps xmm0, XMMWORD PTR [ecx]
movaps xmm1, XMMWORD PTR [ecx+16]
movaps xmm2, XMMWORD PTR [ecx+32]
movaps xmm3, XMMWORD PTR [ecx+48]
movaps xmm4, XMMWORD PTR [edx]
movaps xmm5, xmm4
movaps xmm6, xmm4
movaps xmm7, xmm4
shufps xmm4, xmm4, 00000000b
shufps xmm5, xmm5, 01010101b
shufps xmm6, xmm6, 10101010b
mulps xmm4, xmm0
mulps xmm5, xmm1
mulps xmm6, xmm2
addps xmm4, xmm5
addps xmm4, xmm6
addps xmm4, xmm3
movaps XMMWORD PTR [ecx+48], xmm4
ret 0
@MatrixTranslate@8 ENDP
@MatrixScale@8 PROC PUBLIC
movaps xmm0, XMMWORD PTR [ecx]
movaps xmm1, XMMWORD PTR [ecx+16]
movaps xmm2, XMMWORD PTR [ecx+32]
movaps xmm4, XMMWORD PTR [edx]
movaps xmm5, xmm4
movaps xmm6, xmm4
shufps xmm4, xmm4, 00000000b
shufps xmm5, xmm5, 01010101b
shufps xmm6, xmm6, 10101010b
mulps xmm4, xmm0
mulps xmm5, xmm1
mulps xmm6, xmm2
movaps XMMWORD PTR [ecx],xmm4
movaps XMMWORD PTR [ecx+16],xmm5
movaps XMMWORD PTR [ecx+32],xmm6
ret 0
@MatrixScale@8 ENDP
end
;
; Copyright (C) 2006 yopyop
; Copyright (C) 2008 CrazyMax
;
; This file is part of DeSmuME
;
; DeSmuME is free software; you can redistribute it and/or modify
; it under the terms of the GNU General Public License as published by
; the Free Software Foundation; either version 2 of the License, or
; (at your option) any later version.
;
; DeSmuME is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
; GNU General Public License for more details.
;
; You should have received a copy of the GNU General Public License
; along with DeSmuME; if not, write to the Free Software
; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
TITLE matrix_sse2-x86.asm
.686P
.XMM
.model flat
.code
@MatrixMultVec4x4@8 PROC PUBLIC
movaps xmm0, XMMWORD PTR [ecx]
movaps xmm1, XMMWORD PTR [ecx+16]
movaps xmm2, XMMWORD PTR [ecx+32]
movaps xmm3, XMMWORD PTR [ecx+48]
movaps xmm4, XMMWORD PTR [edx]
movaps xmm5, xmm4
movaps xmm6, xmm4
movaps xmm7, xmm4
shufps xmm4, xmm4, 00000000b
shufps xmm5, xmm5, 01010101b
shufps xmm6, xmm6, 10101010b
shufps xmm7, xmm7, 11111111b
mulps xmm4, xmm0
mulps xmm5, xmm1
mulps xmm6, xmm2
mulps xmm7, xmm3
addps xmm4, xmm5
addps xmm4, xmm6
addps xmm4, xmm7
movaps XMMWORD PTR [edx], xmm4
ret 0
@MatrixMultVec4x4@8 ENDP
@MatrixMultVec3x3@8 PROC PUBLIC
movaps xmm0, XMMWORD PTR [ecx]
movaps xmm1, XMMWORD PTR [ecx+16]
movaps xmm2, XMMWORD PTR [ecx+32]
movaps xmm4, XMMWORD PTR [edx]
movaps xmm5, xmm4
movaps xmm6, xmm4
movaps xmm7, xmm4
shufps xmm4, xmm4, 00000000b
shufps xmm5, xmm5, 01010101b
shufps xmm6, xmm6, 10101010b
mulps xmm4, xmm0
mulps xmm5, xmm1
mulps xmm6, xmm2
addps xmm4, xmm5
addps xmm4, xmm6
movaps XMMWORD PTR [edx], xmm4
ret 0
@MatrixMultVec3x3@8 ENDP
@MatrixMultiply@8 PROC PUBLIC
movaps xmm0, XMMWORD PTR [ecx]
movaps xmm1, XMMWORD PTR [ecx+16]
movaps xmm2, XMMWORD PTR [ecx+32]
movaps xmm3, XMMWORD PTR [ecx+48]
movaps xmm4, XMMWORD PTR [edx] ; r00, r01, r02, r03
movaps xmm5,xmm4
movaps xmm6,xmm4
movaps xmm7,xmm4
shufps xmm4,xmm4,00000000b
shufps xmm5,xmm5,01010101b
shufps xmm6,xmm6,10101010b
shufps xmm7,xmm7,11111111b
mulps xmm4,xmm0
mulps xmm5,xmm1
mulps xmm6,xmm2
mulps xmm7,xmm3
addps xmm4,xmm5
addps xmm4,xmm6
addps xmm4,xmm7
movaps XMMWORD PTR [ecx],xmm4
movaps xmm4, XMMWORD PTR [edx+16] ; r04, r05, r06, r07
movaps xmm5,xmm4
movaps xmm6,xmm4
movaps xmm7,xmm4
shufps xmm4,xmm4,00000000b
shufps xmm5,xmm5,01010101b
shufps xmm6,xmm6,10101010b
shufps xmm7,xmm7,11111111b
mulps xmm4,xmm0
mulps xmm5,xmm1
mulps xmm6,xmm2
mulps xmm7,xmm3
addps xmm4,xmm5
addps xmm4,xmm6
addps xmm4,xmm7
movaps XMMWORD PTR [ecx+16],xmm4
movaps xmm4, XMMWORD PTR [edx+32] ; r08, r09, r10, r11
movaps xmm5,xmm4
movaps xmm6,xmm4
movaps xmm7,xmm4
shufps xmm4,xmm4,00000000b
shufps xmm5,xmm5,01010101b
shufps xmm6,xmm6,10101010b
shufps xmm7,xmm7,11111111b
mulps xmm4,xmm0
mulps xmm5,xmm1
mulps xmm6,xmm2
mulps xmm7,xmm3
addps xmm4,xmm5
addps xmm4,xmm6
addps xmm4,xmm7
movaps XMMWORD PTR [ecx+32],xmm4
movaps xmm4, XMMWORD PTR [edx+48] ; r12, r13, r14, r15
movaps xmm5,xmm4
movaps xmm6,xmm4
movaps xmm7,xmm4
shufps xmm4,xmm4,00000000b
shufps xmm5,xmm5,01010101b
shufps xmm6,xmm6,10101010b
shufps xmm7,xmm7,11111111b
mulps xmm4,xmm0
mulps xmm5,xmm1
mulps xmm6,xmm2
mulps xmm7,xmm3
addps xmm4,xmm5
addps xmm4,xmm6
addps xmm4,xmm7
movaps XMMWORD PTR [ecx+48],xmm4
ret 0
@MatrixMultiply@8 ENDP
@MatrixTranslate@8 PROC PUBLIC
movaps xmm0, XMMWORD PTR [ecx]
movaps xmm1, XMMWORD PTR [ecx+16]
movaps xmm2, XMMWORD PTR [ecx+32]
movaps xmm3, XMMWORD PTR [ecx+48]
movaps xmm4, XMMWORD PTR [edx]
movaps xmm5, xmm4
movaps xmm6, xmm4
movaps xmm7, xmm4
shufps xmm4, xmm4, 00000000b
shufps xmm5, xmm5, 01010101b
shufps xmm6, xmm6, 10101010b
mulps xmm4, xmm0
mulps xmm5, xmm1
mulps xmm6, xmm2
addps xmm4, xmm5
addps xmm4, xmm6
addps xmm4, xmm3
movaps XMMWORD PTR [ecx+48], xmm4
ret 0
@MatrixTranslate@8 ENDP
@MatrixScale@8 PROC PUBLIC
movaps xmm0, XMMWORD PTR [ecx]
movaps xmm1, XMMWORD PTR [ecx+16]
movaps xmm2, XMMWORD PTR [ecx+32]
movaps xmm4, XMMWORD PTR [edx]
movaps xmm5, xmm4
movaps xmm6, xmm4
shufps xmm4, xmm4, 00000000b
shufps xmm5, xmm5, 01010101b
shufps xmm6, xmm6, 10101010b
mulps xmm4, xmm0
mulps xmm5, xmm1
mulps xmm6, xmm2
movaps XMMWORD PTR [ecx],xmm4
movaps XMMWORD PTR [ecx+16],xmm5
movaps XMMWORD PTR [ecx+32],xmm6
ret 0
@MatrixScale@8 ENDP
end

File diff suppressed because it is too large Load Diff