Fixed SSE2 build

This commit is contained in:
mtabachenko 2008-08-03 17:48:31 +00:00
parent 8ab5d75375
commit e0581144d9
4 changed files with 397 additions and 4 deletions

View File

@ -0,0 +1,178 @@
;
; Copyright (C) 2006 yopyop
; Copyright (C) 2008 CrazyMax
;
; This file is part of DeSmuME
;
; DeSmuME is free software; you can redistribute it and/or modify
; it under the terms of the GNU General Public License as published by
; the Free Software Foundation; either version 2 of the License, or
; (at your option) any later version.
;
; DeSmuME is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
; GNU General Public License for more details.
;
; You should have received a copy of the GNU General Public License
; along with DeSmuME; if not, write to the Free Software
; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
TITLE matrix_sse2-x64.asm
.code
MatrixMultVec4x4 PROC PUBLIC
movaps xmm0, XMMWORD PTR [rcx]
movaps xmm1, XMMWORD PTR [rcx+16]
movaps xmm2, XMMWORD PTR [rcx+32]
movaps xmm3, XMMWORD PTR [rcx+48]
movaps xmm4, XMMWORD PTR [rdx]
movaps xmm5, xmm4
movaps xmm6, xmm4
movaps xmm7, xmm4
shufps xmm4, xmm4, 00000000b
shufps xmm5, xmm5, 01010101b
shufps xmm6, xmm6, 10101010b
mulps xmm4, xmm0
mulps xmm5, xmm1
mulps xmm6, xmm2
addps xmm4, xmm5
addps xmm4, xmm6
addps xmm4, xmm3
movaps XMMWORD PTR [rdx], xmm4
ret 0
MatrixMultVec4x4 ENDP
MatrixMultVec3x3 PROC PUBLIC
movaps xmm0, XMMWORD PTR [rcx]
movaps xmm1, XMMWORD PTR [rcx+16]
movaps xmm2, XMMWORD PTR [rcx+32]
movaps xmm4, XMMWORD PTR [rdx]
movaps xmm5, xmm4
movaps xmm6, xmm4
movaps xmm7, xmm4
shufps xmm4, xmm4, 00000000b
shufps xmm5, xmm5, 01010101b
shufps xmm6, xmm6, 10101010b
mulps xmm4, xmm0
mulps xmm5, xmm1
mulps xmm6, xmm2
addps xmm4, xmm5
addps xmm4, xmm6
movaps XMMWORD PTR [rdx], xmm4
MatrixMultVec3x3 ENDP
MatrixMultiply PROC PUBLIC
movaps xmm0, XMMWORD PTR [rcx]
movaps xmm1, XMMWORD PTR [rcx+16]
movaps xmm2, XMMWORD PTR [rcx+32]
movaps xmm3, XMMWORD PTR [rcx+48]
movaps xmm4, XMMWORD PTR [rdx] ; r00, r01, r02, r03
movaps xmm5,xmm4
movaps xmm6,xmm4
movaps xmm7,xmm4
shufps xmm4,xmm4,00000000b
shufps xmm5,xmm5,01010101b
shufps xmm6,xmm6,10101010b
shufps xmm7,xmm7,11111111b
mulps xmm4,xmm0
mulps xmm5,xmm1
mulps xmm6,xmm2
mulps xmm7,xmm3
addps xmm4,xmm5
addps xmm4,xmm6
addps xmm4,xmm7
movaps XMMWORD PTR [rcx],xmm4
movaps xmm4, XMMWORD PTR [rdx+16] ; r04, r05, r06, r07
movaps xmm5,xmm4
movaps xmm6,xmm4
movaps xmm7,xmm4
shufps xmm4,xmm4,00000000b
shufps xmm5,xmm5,01010101b
shufps xmm6,xmm6,10101010b
shufps xmm7,xmm7,11111111b
mulps xmm4,xmm0
mulps xmm5,xmm1
mulps xmm6,xmm2
mulps xmm7,xmm3
addps xmm4,xmm5
addps xmm4,xmm6
addps xmm4,xmm7
movaps XMMWORD PTR [rcx+16],xmm4
movaps xmm4, XMMWORD PTR [rdx+32] ; r08, r09, r10, r11
movaps xmm5,xmm4
movaps xmm6,xmm4
movaps xmm7,xmm4
shufps xmm4,xmm4,00000000b
shufps xmm5,xmm5,01010101b
shufps xmm6,xmm6,10101010b
shufps xmm7,xmm7,11111111b
mulps xmm4,xmm0
mulps xmm5,xmm1
mulps xmm6,xmm2
mulps xmm7,xmm3
addps xmm4,xmm5
addps xmm4,xmm6
addps xmm4,xmm7
movaps XMMWORD PTR [rcx+32],xmm4
movaps xmm4, XMMWORD PTR [rdx+48] ; r12, r13, r14, r15
movaps xmm5,xmm4
movaps xmm6,xmm4
movaps xmm7,xmm4
shufps xmm4,xmm4,00000000b
shufps xmm5,xmm5,01010101b
shufps xmm6,xmm6,10101010b
shufps xmm7,xmm7,11111111b
mulps xmm4,xmm0
mulps xmm5,xmm1
mulps xmm6,xmm2
mulps xmm7,xmm3
addps xmm4,xmm5
addps xmm4,xmm6
addps xmm4,xmm7
movaps XMMWORD PTR [rcx+48],xmm4
ret 0
MatrixMultiply ENDP
MatrixTranslate PROC PUBLIC
movaps xmm0, XMMWORD PTR [rcx]
movaps xmm1, XMMWORD PTR [rcx+16]
movaps xmm2, XMMWORD PTR [rcx+32]
movaps xmm3, XMMWORD PTR [rcx+48]
movaps xmm4, XMMWORD PTR [rdx]
movaps xmm5, xmm4
movaps xmm6, xmm4
movaps xmm7, xmm4
shufps xmm4, xmm4, 00000000b
shufps xmm5, xmm5, 01010101b
shufps xmm6, xmm6, 10101010b
mulps xmm4, xmm0
mulps xmm5, xmm1
mulps xmm6, xmm2
addps xmm4, xmm5
addps xmm4, xmm6
addps xmm4, xmm3
movaps XMMWORD PTR [rcx+48], xmm4
ret 0
MatrixTranslate ENDP
MatrixScale PROC PUBLIC
movaps xmm0, XMMWORD PTR [rcx]
movaps xmm1, XMMWORD PTR [rcx+16]
movaps xmm2, XMMWORD PTR [rcx+32]
movaps xmm4, XMMWORD PTR [rdx]
movaps xmm5, xmm4
movaps xmm6, xmm4
shufps xmm4, xmm4, 00000000b
shufps xmm5, xmm5, 01010101b
shufps xmm6, xmm6, 10101010b
mulps xmm4, xmm0
mulps xmm5, xmm1
mulps xmm6, xmm2
movaps XMMWORD PTR [rcx],xmm4
movaps XMMWORD PTR [rcx+16],xmm5
movaps XMMWORD PTR [rcx+32],xmm6
ret 0
MatrixScale ENDP
end

View File

@ -0,0 +1,215 @@
;
; Copyright (C) 2006 yopyop
; Copyright (C) 2008 CrazyMax
;
; This file is part of DeSmuME
;
; DeSmuME is free software; you can redistribute it and/or modify
; it under the terms of the GNU General Public License as published by
; the Free Software Foundation; either version 2 of the License, or
; (at your option) any later version.
;
; DeSmuME is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
; GNU General Public License for more details.
;
; You should have received a copy of the GNU General Public License
; along with DeSmuME; if not, write to the Free Software
; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
TITLE matrix_sse2-x86.asm
.686P
.XMM
.model flat
.code
@MatrixMultVec4x4@8 PROC PUBLIC
push ebp
mov ebp, esp
movaps xmm0, XMMWORD PTR [ecx]
movaps xmm1, XMMWORD PTR [ecx+16]
movaps xmm2, XMMWORD PTR [ecx+32]
movaps xmm3, XMMWORD PTR [ecx+48]
movaps xmm4, XMMWORD PTR [edx]
movaps xmm5, xmm4
movaps xmm6, xmm4
movaps xmm7, xmm4
shufps xmm4, xmm4, 00000000b
shufps xmm5, xmm5, 01010101b
shufps xmm6, xmm6, 10101010b
mulps xmm4, xmm0
mulps xmm5, xmm1
mulps xmm6, xmm2
addps xmm4, xmm5
addps xmm4, xmm6
addps xmm4, xmm3
movaps XMMWORD PTR [edx], xmm4
mov esp, ebp
pop ebp
ret 0
@MatrixMultVec4x4@8 ENDP
PUBLIC @MatrixMultVec3x3@8
@MatrixMultVec3x3@8 PROC
push ebp
mov ebp, esp
movaps xmm0, XMMWORD PTR [ecx]
movaps xmm1, XMMWORD PTR [ecx+16]
movaps xmm2, XMMWORD PTR [ecx+32]
movaps xmm4, XMMWORD PTR [edx]
movaps xmm5, xmm4
movaps xmm6, xmm4
movaps xmm7, xmm4
shufps xmm4, xmm4, 00000000b
shufps xmm5, xmm5, 01010101b
shufps xmm6, xmm6, 10101010b
mulps xmm4, xmm0
mulps xmm5, xmm1
mulps xmm6, xmm2
addps xmm4, xmm5
addps xmm4, xmm6
movaps XMMWORD PTR [edx], xmm4
mov esp, ebp
pop ebp
ret 0
@MatrixMultVec3x3@8 ENDP
PUBLIC @MatrixMultiply@8
@MatrixMultiply@8 PROC
; mov eax, DWORD PTR[esp+4]
push ebp
mov ebp, esp
movaps xmm0, XMMWORD PTR [ecx]
movaps xmm1, XMMWORD PTR [ecx+16]
movaps xmm2, XMMWORD PTR [ecx+32]
movaps xmm3, XMMWORD PTR [ecx+48]
movaps xmm4, XMMWORD PTR [edx] ; r00, r01, r02, r03
movaps xmm5,xmm4
movaps xmm6,xmm4
movaps xmm7,xmm4
shufps xmm4,xmm4,00000000b
shufps xmm5,xmm5,01010101b
shufps xmm6,xmm6,10101010b
shufps xmm7,xmm7,11111111b
mulps xmm4,xmm0
mulps xmm5,xmm1
mulps xmm6,xmm2
mulps xmm7,xmm3
addps xmm4,xmm5
addps xmm4,xmm6
addps xmm4,xmm7
movaps XMMWORD PTR [ecx],xmm4
movaps xmm4, XMMWORD PTR [edx+16] ; r04, r05, r06, r07
movaps xmm5,xmm4
movaps xmm6,xmm4
movaps xmm7,xmm4
shufps xmm4,xmm4,00000000b
shufps xmm5,xmm5,01010101b
shufps xmm6,xmm6,10101010b
shufps xmm7,xmm7,11111111b
mulps xmm4,xmm0
mulps xmm5,xmm1
mulps xmm6,xmm2
mulps xmm7,xmm3
addps xmm4,xmm5
addps xmm4,xmm6
addps xmm4,xmm7
movaps XMMWORD PTR [ecx+16],xmm4
movaps xmm4, XMMWORD PTR [edx+32] ; r08, r09, r10, r11
movaps xmm5,xmm4
movaps xmm6,xmm4
movaps xmm7,xmm4
shufps xmm4,xmm4,00000000b
shufps xmm5,xmm5,01010101b
shufps xmm6,xmm6,10101010b
shufps xmm7,xmm7,11111111b
mulps xmm4,xmm0
mulps xmm5,xmm1
mulps xmm6,xmm2
mulps xmm7,xmm3
addps xmm4,xmm5
addps xmm4,xmm6
addps xmm4,xmm7
movaps XMMWORD PTR [ecx+32],xmm4
movaps xmm4, XMMWORD PTR [edx+48] ; r12, r13, r14, r15
movaps xmm5,xmm4
movaps xmm6,xmm4
movaps xmm7,xmm4
shufps xmm4,xmm4,00000000b
shufps xmm5,xmm5,01010101b
shufps xmm6,xmm6,10101010b
shufps xmm7,xmm7,11111111b
mulps xmm4,xmm0
mulps xmm5,xmm1
mulps xmm6,xmm2
mulps xmm7,xmm3
addps xmm4,xmm5
addps xmm4,xmm6
addps xmm4,xmm7
movaps XMMWORD PTR [ecx+48],xmm4
mov esp, ebp
pop ebp
ret 0
@MatrixMultiply@8 ENDP
PUBLIC @MatrixTranslate@8
@MatrixTranslate@8 PROC
push ebp
mov ebp, esp
movaps xmm0, XMMWORD PTR [ecx]
movaps xmm1, XMMWORD PTR [ecx+16]
movaps xmm2, XMMWORD PTR [ecx+32]
movaps xmm3, XMMWORD PTR [ecx+48]
movaps xmm4, XMMWORD PTR [edx]
movaps xmm5, xmm4
movaps xmm6, xmm4
movaps xmm7, xmm4
shufps xmm4, xmm4, 00000000b
shufps xmm5, xmm5, 01010101b
shufps xmm6, xmm6, 10101010b
mulps xmm4, xmm0
mulps xmm5, xmm1
mulps xmm6, xmm2
addps xmm4, xmm5
addps xmm4, xmm6
addps xmm4, xmm3
movaps XMMWORD PTR [ecx+48], xmm4
mov esp, ebp
pop ebp
ret 0
@MatrixTranslate@8 ENDP
PUBLIC @MatrixScale@8
@MatrixScale@8 PROC
push ebp
mov ebp, esp
movaps xmm0, XMMWORD PTR [ecx]
movaps xmm1, XMMWORD PTR [ecx+16]
movaps xmm2, XMMWORD PTR [ecx+32]
movaps xmm4, XMMWORD PTR [edx]
movaps xmm5, xmm4
movaps xmm6, xmm4
shufps xmm4, xmm4, 00000000b
shufps xmm5, xmm5, 01010101b
shufps xmm6, xmm6, 10101010b
mulps xmm4, xmm0
mulps xmm5, xmm1
mulps xmm6, xmm2
movaps XMMWORD PTR [ecx],xmm4
movaps XMMWORD PTR [ecx+16],xmm5
movaps XMMWORD PTR [ecx+32],xmm6
mov esp, ebp
pop ebp
ret 0
@MatrixScale@8 ENDP
end

View File

@ -664,7 +664,7 @@
>
</File>
<File
RelativePath=".\matrix_sse2-x64.asm"
RelativePath="..\matrix_sse2-x64.asm"
>
<FileConfiguration
Name="Debug|Win32"
@ -727,7 +727,7 @@
</FileConfiguration>
</File>
<File
RelativePath=".\matrix_sse2-x86.asm"
RelativePath="..\matrix_sse2-x86.asm"
>
<FileConfiguration
Name="Debug|Win32"

View File

@ -663,7 +663,7 @@
>
</File>
<File
RelativePath=".\matrix_sse2-x64.asm"
RelativePath="..\matrix_sse2-x64.asm"
>
<FileConfiguration
Name="Debug|Win32"
@ -726,7 +726,7 @@
</FileConfiguration>
</File>
<File
RelativePath=".\matrix_sse2-x86.asm"
RelativePath="..\matrix_sse2-x86.asm"
>
<FileConfiguration
Name="Debug|Win32"