diff --git a/Source/Glide64/Glide64.vcproj b/Source/Glide64/Glide64.vcproj
index d488e962a..13c16a145 100644
--- a/Source/Glide64/Glide64.vcproj
+++ b/Source/Glide64/Glide64.vcproj
@@ -270,10 +270,6 @@
RelativePath="TexModCI.h"
>
-
-
0x0000000F
+ or ecx,eax
+ shl eax,4
+ or ecx,eax
+
+ mov eax,edx // 0x0F000000 -> 0x00000F00
+ shr eax,16
+ and eax,0x00000F00
+ or ecx,eax
+ shl eax,4
+ or ecx,eax
+
+ mov eax,edx
+ shr eax,4 // 0x00F00000 -> 0x000F0000
+ and eax,0x000F0000
+ or ecx,eax
+ shl eax,4
+ or ecx,eax
+
+ mov eax,edx
+ shl eax,8 // 0x000F0000 -> 0x0F000000
+ and eax,0x0F000000
+ or ecx,eax
+ shl eax,4
+ or ecx,eax
+
+ mov [edi],ecx
+ add edi,4
+ // }
+
+ // 2nd dword {
+ xor ecx,ecx
+ mov eax,edx
+ shr eax,12 // 0x0000F000 -> 0x0000000F
+ and eax,0x0000000F
+ or ecx,eax
+ shl eax,4
+ or ecx,eax
+
+ mov eax,edx // 0x00000F00 -> 0x00000F00
+ and eax,0x00000F00
+ or ecx,eax
+ shl eax,4
+ or ecx,eax
+
+ mov eax,edx
+ shl eax,12 // 0x000000F0 -> 0x000F0000
+ and eax,0x000F0000
+ or ecx,eax
+ shl eax,4
+ or ecx,eax
+
+ shl edx,24 // 0x0000000F -> 0x0F000000
+ and edx,0x0F000000
+ or ecx,edx
+ shl edx,4
+ or ecx,edx
+
+ mov [edi],ecx
+ add edi,4
+ // }
+
+ // * copy
+ mov eax,[esi] // read all 8 pixels
+ bswap eax
+ add esi,4
+ mov edx,eax
+
+ // 1st dword {
+ xor ecx,ecx
+ shr eax,28 // 0xF0000000 -> 0x0000000F
+ or ecx,eax
+ shl eax,4
+ or ecx,eax
+
+ mov eax,edx // 0x0F000000 -> 0x00000F00
+ shr eax,16
+ and eax,0x00000F00
+ or ecx,eax
+ shl eax,4
+ or ecx,eax
+
+ mov eax,edx
+ shr eax,4 // 0x00F00000 -> 0x000F0000
+ and eax,0x000F0000
+ or ecx,eax
+ shl eax,4
+ or ecx,eax
+
+ mov eax,edx
+ shl eax,8 // 0x000F0000 -> 0x0F000000
+ and eax,0x0F000000
+ or ecx,eax
+ shl eax,4
+ or ecx,eax
+
+ mov [edi],ecx
+ add edi,4
+ // }
+
+ // 2nd dword {
+ xor ecx,ecx
+ mov eax,edx
+ shr eax,12 // 0x0000F000 -> 0x0000000F
+ and eax,0x0000000F
+ or ecx,eax
+ shl eax,4
+ or ecx,eax
+
+ mov eax,edx // 0x00000F00 -> 0x00000F00
+ and eax,0x00000F00
+ or ecx,eax
+ shl eax,4
+ or ecx,eax
+
+ mov eax,edx
+ shl eax,12 // 0x000000F0 -> 0x000F0000
+ and eax,0x000F0000
+ or ecx,eax
+ shl eax,4
+ or ecx,eax
+
+ shl edx,24 // 0x0000000F -> 0x0F000000
+ and edx,0x0F000000
+ or ecx,edx
+ shl edx,4
+ or ecx,edx
+
+ mov [edi],ecx
+ add edi,4
+ // }
+ // *
+
+ pop ecx
+ dec ecx
+ jnz x_loop
+
+ pop ecx
+ dec ecx
+ jz near end_y_loop
+ push ecx
+
+ add esi,[line]
+ add edi,[ext]
+
+ mov ecx,[wid_64]
+x_loop_2:
+ push ecx
+
+ mov eax,[esi+4] // read all 8 pixels
+ bswap eax
+ mov edx,eax
+
+ // 1st dword {
+ xor ecx,ecx
+ shr eax,28 // 0xF0000000 -> 0x0000000F
+ or ecx,eax
+ shl eax,4
+ or ecx,eax
+
+ mov eax,edx // 0x0F000000 -> 0x00000F00
+ shr eax,16
+ and eax,0x00000F00
+ or ecx,eax
+ shl eax,4
+ or ecx,eax
+
+ mov eax,edx
+ shr eax,4 // 0x00F00000 -> 0x000F0000
+ and eax,0x000F0000
+ or ecx,eax
+ shl eax,4
+ or ecx,eax
+
+ mov eax,edx
+ shl eax,8 // 0x000F0000 -> 0x0F000000
+ and eax,0x0F000000
+ or ecx,eax
+ shl eax,4
+ or ecx,eax
+
+ mov [edi],ecx
+ add edi,4
+ // }
+
+ // 2nd dword {
+ xor ecx,ecx
+ mov eax,edx
+ shr eax,12 // 0x0000F000 -> 0x0000000F
+ and eax,0x0000000F
+ or ecx,eax
+ shl eax,4
+ or ecx,eax
+
+ mov eax,edx // 0x00000F00 -> 0x00000F00
+ and eax,0x00000F00
+ or ecx,eax
+ shl eax,4
+ or ecx,eax
+
+ mov eax,edx
+ shl eax,12 // 0x000000F0 -> 0x000F0000
+ and eax,0x000F0000
+ or ecx,eax
+ shl eax,4
+ or ecx,eax
+
+ shl edx,24 // 0x0000000F -> 0x0F000000
+ and edx,0x0F000000
+ or ecx,edx
+ shl edx,4
+ or ecx,edx
+
+ mov [edi],ecx
+ add edi,4
+ // }
+
+ // * copy
+ mov eax,[esi] // read all 8 pixels
+ bswap eax
+ add esi,8
+ mov edx,eax
+
+ // 1st dword {
+ xor ecx,ecx
+ shr eax,28 // 0xF0000000 -> 0x0000000F
+ or ecx,eax
+ shl eax,4
+ or ecx,eax
+
+ mov eax,edx // 0x0F000000 -> 0x00000F00
+ shr eax,16
+ and eax,0x00000F00
+ or ecx,eax
+ shl eax,4
+ or ecx,eax
+
+ mov eax,edx
+ shr eax,4 // 0x00F00000 -> 0x000F0000
+ and eax,0x000F0000
+ or ecx,eax
+ shl eax,4
+ or ecx,eax
+
+ mov eax,edx
+ shl eax,8 // 0x000F0000 -> 0x0F000000
+ and eax,0x0F000000
+ or ecx,eax
+ shl eax,4
+ or ecx,eax
+
+ mov [edi],ecx
+ add edi,4
+ // }
+
+ // 2nd dword {
+ xor ecx,ecx
+ mov eax,edx
+ shr eax,12 // 0x0000F000 -> 0x0000000F
+ and eax,0x0000000F
+ or ecx,eax
+ shl eax,4
+ or ecx,eax
+
+ mov eax,edx // 0x00000F00 -> 0x00000F00
+ and eax,0x00000F00
+ or ecx,eax
+ shl eax,4
+ or ecx,eax
+
+ mov eax,edx
+ shl eax,12 // 0x000000F0 -> 0x000F0000
+ and eax,0x000F0000
+ or ecx,eax
+ shl eax,4
+ or ecx,eax
+
+ shl edx,24 // 0x0000000F -> 0x0F000000
+ and edx,0x0F000000
+ or ecx,edx
+ shl edx,4
+ or ecx,edx
+
+ mov [edi],ecx
+ add edi,4
+ // }
+ // *
+
+ pop ecx
+ dec ecx
+ jnz x_loop_2
+
+ add esi,[line]
+ add edi,[ext]
+
+ pop ecx
+ dec ecx
+ jnz y_loop
+
+end_y_loop:
+ pop edi
+ pop esi
+ pop ebx
+ mov esp, ebp
+ pop ebp
+ ret
+ }
+}
//****************************************************************
// Size: 0, Format: 2
diff --git a/Source/Glide64/TexLoad8b.h b/Source/Glide64/TexLoad8b.h
index d0477877b..388e55e7c 100644
--- a/Source/Glide64/TexLoad8b.h
+++ b/Source/Glide64/TexLoad8b.h
@@ -37,10 +37,630 @@
//
//****************************************************************
-extern "C" void asmLoad8bCI (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext, wxUIntPtr pal);
-extern "C" void asmLoad8bIA8 (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext, wxUIntPtr pal);
-extern "C" void asmLoad8bIA4 (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext);
-extern "C" void asmLoad8bI (wxUIntPtr src, int dst, wxUIntPtr wid_64, int height, int line, int ext);
+extern "C" void __declspec(naked) asmLoad8bCI (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext, wxUIntPtr pal)
+{
+ _asm {
+ push ebp
+ mov ebp, esp
+ push ebx
+ push esi
+ push edi
+
+ mov ebx,[pal]
+ mov esi,[src]
+ mov edi,[dst]
+ mov ecx,[height]
+y_loop:
+ push ecx
+ mov ecx,[wid_64]
+x_loop:
+ push ecx
+
+ mov eax,[esi] // read all 4 pixels
+ bswap eax
+ add esi,4
+ mov edx,eax
+
+ // 1st dword output {
+ shr eax,15
+ and eax,0x1FE
+ mov cx,[ebx+eax]
+ ror cx,1
+ shl ecx,16
+
+ mov eax,edx
+ shr eax,23
+ and eax,0x1FE
+ mov cx,[ebx+eax]
+ ror cx,1
+
+ mov [edi],ecx
+ add edi,4
+ // }
+
+ // 2nd dword output {
+ mov eax,edx
+ shl eax,1
+ and eax,0x1FE
+ mov cx,[ebx+eax]
+ ror cx,1
+ shl ecx,16
+
+ shr edx,7
+ and edx,0x1FE
+ mov cx,[ebx+edx]
+ ror cx,1
+
+ mov [edi],ecx
+ add edi,4
+ // }
+
+ // * copy
+ mov eax,[esi] // read all 4 pixels
+ bswap eax
+ add esi,4
+ mov edx,eax
+
+ // 1st dword output {
+ shr eax,15
+ and eax,0x1FE
+ mov cx,[ebx+eax]
+ ror cx,1
+ shl ecx,16
+
+ mov eax,edx
+ shr eax,23
+ and eax,0x1FE
+ mov cx,[ebx+eax]
+ ror cx,1
+
+ mov [edi],ecx
+ add edi,4
+ // }
+
+ // 2nd dword output {
+ mov eax,edx
+ shl eax,1
+ and eax,0x1FE
+ mov cx,[ebx+eax]
+ ror cx,1
+ shl ecx,16
+
+ shr edx,7
+ and edx,0x1FE
+ mov cx,[ebx+edx]
+ ror cx,1
+
+ mov [edi],ecx
+ add edi,4
+ // }
+ // *
+
+ pop ecx
+
+ dec ecx
+ jnz x_loop
+
+ pop ecx
+ dec ecx
+ jz near end_y_loop
+ push ecx
+
+ mov eax,esi
+ add eax,[line]
+ mov esi,[src]
+ sub eax,esi
+ and eax,0x7FF
+ add esi,eax
+ add edi,[ext]
+
+ mov ecx,[wid_64]
+x_loop_2:
+ push ecx
+
+ mov eax,[esi+4] // read all 4 pixels
+ bswap eax
+ mov edx,eax
+
+ // 1st dword output {
+ shr eax,15
+ and eax,0x1FE
+ mov cx,[ebx+eax]
+ ror cx,1
+ shl ecx,16
+
+ mov eax,edx
+ shr eax,23
+ and eax,0x1FE
+ mov cx,[ebx+eax]
+ ror cx,1
+
+ mov [edi],ecx
+ add edi,4
+ // }
+
+ // 2nd dword output {
+ mov eax,edx
+ shl eax,1
+ and eax,0x1FE
+ mov cx,[ebx+eax]
+ ror cx,1
+ shl ecx,16
+
+ shr edx,7
+ and edx,0x1FE
+ mov cx,[ebx+edx]
+ ror cx,1
+
+ mov [edi],ecx
+ add edi,4
+ // }
+
+ // * copy
+ mov eax,[esi] // read all 4 pixels
+ bswap eax
+ mov edx,esi
+ add edx,8
+ mov esi,[src]
+ sub edx,esi
+ and edx,0x7FF
+ add esi,edx
+ mov edx,eax
+
+ // 1st dword output {
+ shr eax,15
+ and eax,0x1FE
+ mov cx,[ebx+eax]
+ ror cx,1
+ shl ecx,16
+
+ mov eax,edx
+ shr eax,23
+ and eax,0x1FE
+ mov cx,[ebx+eax]
+ ror cx,1
+
+ mov [edi],ecx
+ add edi,4
+ // }
+
+ // 2nd dword output {
+ mov eax,edx
+ shl eax,1
+ and eax,0x1FE
+ mov cx,[ebx+eax]
+ ror cx,1
+ shl ecx,16
+
+ shr edx,7
+ and edx,0x1FE
+ mov cx,[ebx+edx]
+ ror cx,1
+
+ mov [edi],ecx
+ add edi,4
+ // }
+ // *
+
+ pop ecx
+
+ dec ecx
+ jnz x_loop_2
+
+ mov eax,esi
+ add eax,[line]
+ mov esi,[src]
+ sub eax,esi
+ and eax,0x7FF
+ add esi,eax
+ add edi,[ext]
+
+ pop ecx
+ dec ecx
+ jnz y_loop
+
+end_y_loop:
+ pop edi
+ pop esi
+ pop ebx
+ mov esp, ebp
+ pop ebp
+ ret
+ }
+}
+
+extern "C" void __declspec(naked) asmLoad8bIA8 (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext, wxUIntPtr pal)
+{
+ _asm {
+ push ebp
+ mov ebp, esp
+ push ebx
+ push esi
+ push edi
+
+ mov ebx,[pal]
+ mov esi,[src]
+ mov edi,[dst]
+ mov ecx,[height]
+y_loop:
+ push ecx
+ mov ecx,[wid_64]
+x_loop:
+ push ecx
+
+ mov eax,[esi] // read all 4 pixels
+ bswap eax
+ add esi,4
+ mov edx,eax
+
+ // 1st dword output {
+ shr eax,15
+ and eax,0x1FE
+ mov cx,[ebx+eax]
+ ror cx,8
+ shl ecx,16
+
+ mov eax,edx
+ shr eax,23
+ and eax,0x1FE
+ mov cx,[ebx+eax]
+ ror cx,8
+
+ mov [edi],ecx
+ add edi,4
+ // }
+
+ // 2nd dword output {
+ mov eax,edx
+ shl eax,1
+ and eax,0x1FE
+ mov cx,[ebx+eax]
+ ror cx,8
+ shl ecx,16
+
+ shr edx,7
+ and edx,0x1FE
+ mov cx,[ebx+edx]
+ ror cx,8
+
+ mov [edi],ecx
+ add edi,4
+ // }
+
+ // * copy
+ mov eax,[esi] // read all 4 pixels
+ bswap eax
+ add esi,4
+ mov edx,eax
+
+ // 1st dword output {
+ shr eax,15
+ and eax,0x1FE
+ mov cx,[ebx+eax]
+ ror cx,8
+ shl ecx,16
+
+ mov eax,edx
+ shr eax,23
+ and eax,0x1FE
+ mov cx,[ebx+eax]
+ ror cx,8
+
+ mov [edi],ecx
+ add edi,4
+ // }
+
+ // 2nd dword output {
+ mov eax,edx
+ shl eax,1
+ and eax,0x1FE
+ mov cx,[ebx+eax]
+ ror cx,8
+ shl ecx,16
+
+ shr edx,7
+ and edx,0x1FE
+ mov cx,[ebx+edx]
+ ror cx,8
+
+ mov [edi],ecx
+ add edi,4
+ // }
+ // *
+
+ pop ecx
+
+ dec ecx
+ jnz x_loop
+
+ pop ecx
+ dec ecx
+ jz near end_y_loop
+ push ecx
+
+ add esi,[line]
+ add edi,[ext]
+
+ mov ecx,[wid_64]
+x_loop_2:
+ push ecx
+
+ mov eax,[esi+4] // read all 4 pixels
+ bswap eax
+ mov edx,eax
+
+ // 1st dword output {
+ shr eax,15
+ and eax,0x1FE
+ mov cx,[ebx+eax]
+ ror cx,8
+ shl ecx,16
+
+ mov eax,edx
+ shr eax,23
+ and eax,0x1FE
+ mov cx,[ebx+eax]
+ ror cx,8
+
+ mov [edi],ecx
+ add edi,4
+ // }
+
+ // 2nd dword output {
+ mov eax,edx
+ shl eax,1
+ and eax,0x1FE
+ mov cx,[ebx+eax]
+ ror cx,8
+ shl ecx,16
+
+ shr edx,7
+ and edx,0x1FE
+ mov cx,[ebx+edx]
+ ror cx,8
+
+ mov [edi],ecx
+ add edi,4
+ // }
+
+ // * copy
+ mov eax,[esi] // read all 4 pixels
+ bswap eax
+ add esi,8
+ mov edx,eax
+
+ // 1st dword output {
+ shr eax,15
+ and eax,0x1FE
+ mov cx,[ebx+eax]
+ ror cx,8
+ shl ecx,16
+
+ mov eax,edx
+ shr eax,23
+ and eax,0x1FE
+ mov cx,[ebx+eax]
+ ror cx,8
+
+ mov [edi],ecx
+ add edi,4
+ // }
+
+ // 2nd dword output {
+ mov eax,edx
+ shl eax,1
+ and eax,0x1FE
+ mov cx,[ebx+eax]
+ ror cx,8
+ shl ecx,16
+
+ shr edx,7
+ and edx,0x1FE
+ mov cx,[ebx+edx]
+ ror cx,8
+
+ mov [edi],ecx
+ add edi,4
+ // }
+ // *
+
+ pop ecx
+
+ dec ecx
+ jnz x_loop_2
+
+ add esi,[line]
+ add edi,[ext]
+
+ pop ecx
+ dec ecx
+ jnz y_loop
+
+end_y_loop:
+ pop edi
+ pop esi
+ pop ebx
+ mov esp, ebp
+ pop ebp
+ ret
+ }
+}
+
+extern "C" void __declspec(naked) asmLoad8bIA4 (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext)
+{
+ _asm {
+ push ebp
+ mov ebp, esp
+ push ebx
+ push esi
+ push edi
+
+ mov esi,[src]
+ mov edi,[dst]
+ mov ecx,[height]
+y_loop:
+ push ecx
+ mov ecx,[wid_64]
+x_loop:
+ mov eax,[esi] // read all 4 pixels
+ mov edx,eax
+
+ shr eax,4 //all alpha
+ shl edx,4
+ and eax,0x0F0F0F0F
+ and edx,0xF0F0F0F0
+ add esi,4
+ or eax,edx
+
+ mov [edi],eax // save dword
+ add edi,4
+
+ mov eax,[esi] // read all 4 pixels
+ mov edx,eax
+
+ shr eax,4 //all alpha
+ shl edx,4
+ and eax,0x0F0F0F0F
+ and edx,0xF0F0F0F0
+ add esi,4
+ or eax,edx
+
+ mov [edi],eax // save dword
+ add edi,4
+ // *
+
+ dec ecx
+ jnz x_loop
+
+ pop ecx
+ dec ecx
+ jz end_y_loop
+ push ecx
+
+ add esi,[line]
+ add edi,[ext]
+
+ mov ecx,[wid_64]
+x_loop_2:
+ mov eax,[esi+4] // read both pixels
+ mov edx,eax
+
+ shr eax,4 //all alpha
+ shl edx,4
+ and eax,0x0F0F0F0F
+ and edx,0xF0F0F0F0
+ or eax,edx
+
+ mov [edi],eax //save dword
+ add edi,4
+
+ mov eax,[esi] // read both pixels
+ add esi,8
+ mov edx,eax
+
+ shr eax,4 //all alpha
+ shl edx,4
+ and eax,0x0F0F0F0F
+ and edx,0xF0F0F0F0
+ or eax,edx
+
+ mov [edi],eax //save dword
+ add edi,4
+ // *
+
+ dec ecx
+ jnz x_loop_2
+
+ add esi,[line]
+ add edi,[ext]
+
+ pop ecx
+ dec ecx
+ jnz y_loop
+
+end_y_loop:
+ pop edi
+ pop esi
+ pop ebx
+ mov esp, ebp
+ pop ebp
+ ret
+ }
+}
+
+extern "C" void __declspec(naked) asmLoad8bI (wxUIntPtr src, int dst, wxUIntPtr wid_64, int height, int line, int ext)
+{
+ _asm {
+ push ebp
+ mov ebp, esp
+ push ebx
+ push esi
+ push edi
+
+ mov esi,[src]
+ mov edi,[dst]
+ mov ecx,[height]
+y_loop:
+ push ecx
+ mov ecx,[wid_64]
+x_loop:
+ mov eax,[esi] // read all 4 pixels
+ add esi,4
+
+ mov [edi],eax // save dword
+ add edi,4
+
+ mov eax,[esi] // read all 4 pixels
+ add esi,4
+
+ mov [edi],eax // save dword
+ add edi,4
+ // *
+
+ dec ecx
+ jnz x_loop
+
+ pop ecx
+ dec ecx
+ jz end_y_loop
+ push ecx
+
+ add esi,[line]
+ add edi,[ext]
+
+ mov ecx,[wid_64]
+x_loop_2:
+ mov eax,[esi+4] // read both pixels
+
+ mov [edi],eax //save dword
+ add edi,4
+
+ mov eax,[esi] // read both pixels
+ add esi,8
+
+ mov [edi],eax //save dword
+ add edi,4
+ // *
+
+ dec ecx
+ jnz x_loop_2
+
+ add esi,[line]
+ add edi,[ext]
+
+ pop ecx
+ dec ecx
+ jnz y_loop
+
+end_y_loop:
+ pop edi
+ pop esi
+ pop ebx
+ mov esp, ebp
+ pop ebp
+ ret
+ }
+}
//****************************************************************
// Size: 1, Format: 2
diff --git a/Source/Glide64/Texture.asm.cpp b/Source/Glide64/Texture.asm.cpp
deleted file mode 100644
index fbfdabea1..000000000
--- a/Source/Glide64/Texture.asm.cpp
+++ /dev/null
@@ -1,3857 +0,0 @@
-/*
-* Glide64 - Glide video plugin for Nintendo 64 emulators.
-*
-* This program is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License as published by
-* the Free Software Foundation; either version 2 of the License, or
-* any later version.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU General Public License for more details.
-*
-* You should have received a copy of the GNU General Public License
-* along with this program; if not, write to the Free Software
-* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-*/
-
-/****************************************************************
-
- Glide64 - Glide Plugin for Nintendo 64 emulators
- Project started on December 29th, 2001
-
- Authors:
- Dave2001, original author, founded the project in 2001, left it in 2002
- Gugaman, joined the project in 2002, left it in 2002
- Sergey 'Gonetz' Lipski, joined the project in 2002, main author since fall of 2002
- Hiroshi 'KoolSmoky' Morii, joined the project in 2007
-
-****************************************************************
-
- To modify Glide64:
- * Write your name and (optional)email, commented by your work, so I know who did it, and so that you can find which parts you modified when it comes time to send it to me.
- * Do NOT send me the whole project or file that you modified. Take out your modified code sections, and tell me where to put them. If people sent the whole thing, I would have many different versions, but no idea how to combine them all.
-
-****************************************************************
-*/
-
-#include "Gfx_1.3.h"
-
-/****************************************************************
-
- ******** Textures load ********
-
-****************************************************************/
-
-
-/*****************************************************************
-4b textures load
-*****************************************************************/
-
-
-/****************************************************************
- Size: 0, Format: 2
- 2009 ported to NASM - Sergey (Gonetz) Lipski
- *****************************************************************/
-extern "C" void __declspec(naked) asmLoad4bCI (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext, wxUIntPtr pal)
-{
- _asm {
- push ebp
- mov ebp, esp
- push ebx
- push esi
- push edi
-
- mov ebx,[pal]
- mov esi,[src]
- mov edi,[dst]
- mov ecx,[height]
-y_loop:
- push ecx
- mov ecx,[wid_64]
-x_loop:
- push ecx
-
- mov eax,[esi] // read all 8 pixels
- bswap eax
- add esi,4
- mov edx,eax
-
- // 1st dword output {
- shr eax,23
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,1
- shl ecx,16
-
- mov eax,edx
- shr eax,27
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,1
-
- mov [edi],ecx
- add edi,4
- // }
-
- // 2nd dword output {
- mov eax,edx
- shr eax,15
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,1
- shl ecx,16
-
- mov eax,edx
- shr eax,19
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,1
-
- mov [edi],ecx
- add edi,4
- // }
-
- // 3rd dword output {
- mov eax,edx
- shr eax,7
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,1
- shl ecx,16
-
- mov eax,edx
- shr eax,11
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,1
-
- mov [edi],ecx
- add edi,4
- // }
-
- // 4th dword output {
- mov eax,edx
- shl eax,1
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,1
- shl ecx,16
-
- shr edx,3
- and edx,0x1E
- mov cx,[ebx+edx]
- ror cx,1
-
- mov [edi],ecx
- add edi,4
- // }
-
- // * copy
- mov eax,[esi] // read all 8 pixels
- bswap eax
- add esi,4
- mov edx,eax
-
- // 1st dword output {
- shr eax,23
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,1
- shl ecx,16
-
- mov eax,edx
- shr eax,27
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,1
-
- mov [edi],ecx
- add edi,4
- // }
-
- // 2nd dword output {
- mov eax,edx
- shr eax,15
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,1
- shl ecx,16
-
- mov eax,edx
- shr eax,19
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,1
-
- mov [edi],ecx
- add edi,4
- // }
-
- // 3rd dword output {
- mov eax,edx
- shr eax,7
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,1
- shl ecx,16
-
- mov eax,edx
- shr eax,11
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,1
-
- mov [edi],ecx
- add edi,4
- // }
-
- // 4th dword output {
- mov eax,edx
- shl eax,1
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,1
- shl ecx,16
-
- shr edx,3
- and edx,0x1E
- mov cx,[ebx+edx]
- ror cx,1
-
- mov [edi],ecx
- add edi,4
- // }
- // *
-
- pop ecx
-
- dec ecx
- jnz x_loop
-
- pop ecx
- dec ecx
- jz near end_y_loop
- push ecx
-
- mov eax,esi
- add eax,[line]
- mov esi,[src]
- sub eax,esi
- and eax,0x7FF
- add esi,eax
- add edi,[ext]
-
- mov ecx,[wid_64]
- x_loop_2:
- push ecx
-
- mov eax,[esi+4] // read all 8 pixels
- bswap eax
- mov edx,eax
-
- // 1st dword output {
- shr eax,23
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,1
- shl ecx,16
-
- mov eax,edx
- shr eax,27
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,1
-
- mov [edi],ecx
- add edi,4
- // }
-
- // 2nd dword output {
- mov eax,edx
- shr eax,15
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,1
- shl ecx,16
-
- mov eax,edx
- shr eax,19
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,1
-
- mov [edi],ecx
- add edi,4
- // }
-
- // 3rd dword output {
- mov eax,edx
- shr eax,7
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,1
- shl ecx,16
-
- mov eax,edx
- shr eax,11
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,1
-
- mov [edi],ecx
- add edi,4
- // }
-
- // 4th dword output {
- mov eax,edx
- shl eax,1
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,1
- shl ecx,16
-
- shr edx,3
- and edx,0x1E
- mov cx,[ebx+edx]
- ror cx,1
-
- mov [edi],ecx
- add edi,4
- // }
-
- // * copy
- mov eax,[esi] // read all 8 pixels
- bswap eax
- mov edx,esi
- add edx,8
- mov esi,[src]
- sub edx,esi
- and edx,0x7FF
- add esi,edx
- mov edx,eax
-
- // 1st dword output {
- shr eax,23
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,1
- shl ecx,16
-
- mov eax,edx
- shr eax,27
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,1
-
- mov [edi],ecx
- add edi,4
- // }
-
- // 2nd dword output {
- mov eax,edx
- shr eax,15
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,1
- shl ecx,16
-
- mov eax,edx
- shr eax,19
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,1
-
- mov [edi],ecx
- add edi,4
- // }
-
- // 3rd dword output {
- mov eax,edx
- shr eax,7
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,1
- shl ecx,16
-
- mov eax,edx
- shr eax,11
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,1
-
- mov [edi],ecx
- add edi,4
- // }
-
- // 4th dword output {
- mov eax,edx
- shl eax,1
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,1
- shl ecx,16
-
- shr edx,3
- and edx,0x1E
- mov cx,[ebx+edx]
- ror cx,1
-
- mov [edi],ecx
- add edi,4
- // }
- // *
-
- pop ecx
-
- dec ecx
- jnz x_loop_2
-
- mov eax,esi
- add eax,[line]
- mov esi,[src]
- sub eax,esi
- and eax,0x7FF
- add esi,eax
- add edi,[ext]
-
- pop ecx
- dec ecx
- jnz y_loop
-
-end_y_loop:
- pop edi
- pop esi
- pop ebx
- mov esp, ebp
- pop ebp
- ret
- }
-}
-
-extern "C" void __declspec(naked) asmLoad4bIAPal (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext, wxUIntPtr pal)
-{
- _asm {
- push ebp
- mov ebp, esp
- push ebx
- push esi
- push edi
-
- mov ebx,[pal]
- mov esi,[src]
- mov edi,[dst]
- mov ecx,[height]
-y_loop:
- push ecx
- mov ecx,[wid_64]
-x_loop:
- push ecx
-
- mov eax,[esi] // read all 8 pixels
- bswap eax
- add esi,4
- mov edx,eax
-
- // 1st dword output {
- shr eax,23
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,8
- shl ecx,16
-
- mov eax,edx
- shr eax,27
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,8
-
- mov [edi],ecx
- add edi,4
- // }
-
- // 2nd dword output {
- mov eax,edx
- shr eax,15
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,8
- shl ecx,16
-
- mov eax,edx
- shr eax,19
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,8
-
- mov [edi],ecx
- add edi,4
- // }
-
- // 3rd dword output {
- mov eax,edx
- shr eax,7
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,8
- shl ecx,16
-
- mov eax,edx
- shr eax,11
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,8
-
- mov [edi],ecx
- add edi,4
- // }
-
- // 4th dword output {
- mov eax,edx
- shl eax,1
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,8
- shl ecx,16
-
- shr edx,3
- and edx,0x1E
- mov cx,[ebx+edx]
- ror cx,8
-
- mov [edi],ecx
- add edi,4
- // }
-
- // * copy
- mov eax,[esi] // read all 8 pixels
- bswap eax
- add esi,4
- mov edx,eax
-
- // 1st dword output {
- shr eax,23
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,8
- shl ecx,16
-
- mov eax,edx
- shr eax,27
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,8
-
- mov [edi],ecx
- add edi,4
- // }
-
- // 2nd dword output {
- mov eax,edx
- shr eax,15
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,8
- shl ecx,16
-
- mov eax,edx
- shr eax,19
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,8
-
- mov [edi],ecx
- add edi,4
- // }
-
- // 3rd dword output {
- mov eax,edx
- shr eax,7
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,8
- shl ecx,16
-
- mov eax,edx
- shr eax,11
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,8
-
- mov [edi],ecx
- add edi,4
- // }
-
- // 4th dword output {
- mov eax,edx
- shl eax,1
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,8
- shl ecx,16
-
- shr edx,3
- and edx,0x1E
- mov cx,[ebx+edx]
- ror cx,8
-
- mov [edi],ecx
- add edi,4
- // }
- // *
-
- pop ecx
-
- dec ecx
- jnz x_loop
-
- pop ecx
- dec ecx
- jz near end_y_loop
- push ecx
-
- mov eax,esi
- add eax,[line]
- mov esi,[src]
- sub eax,esi
- and eax,0x7FF
- add esi,eax
- add edi,[ext]
-
- mov ecx,[wid_64]
-x_loop_2:
- push ecx
-
- mov eax,[esi+4] // read all 8 pixels
- bswap eax
- mov edx,eax
-
- // 1st dword output {
- shr eax,23
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,8
- shl ecx,16
-
- mov eax,edx
- shr eax,27
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,8
-
- mov [edi],ecx
- add edi,4
- // }
-
- // 2nd dword output {
- mov eax,edx
- shr eax,15
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,8
- shl ecx,16
-
- mov eax,edx
- shr eax,19
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,8
-
- mov [edi],ecx
- add edi,4
- // }
-
- // 3rd dword output {
- mov eax,edx
- shr eax,7
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,8
- shl ecx,16
-
- mov eax,edx
- shr eax,11
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,8
-
- mov [edi],ecx
- add edi,4
- // }
-
- // 4th dword output {
- mov eax,edx
- shl eax,1
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,8
- shl ecx,16
-
- shr edx,3
- and edx,0x1E
- mov cx,[ebx+edx]
- ror cx,8
-
- mov [edi],ecx
- add edi,4
- // }
-
- // * copy
- mov eax,[esi] // read all 8 pixels
- bswap eax
- mov edx,esi
- add edx,8
- mov esi,[src]
- sub edx,esi
- and edx,0x7FF
- add esi,edx
- mov edx,eax
-
- // 1st dword output {
- shr eax,23
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,8
- shl ecx,16
-
- mov eax,edx
- shr eax,27
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,8
-
- mov [edi],ecx
- add edi,4
- // }
-
- // 2nd dword output {
- mov eax,edx
- shr eax,15
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,8
- shl ecx,16
-
- mov eax,edx
- shr eax,19
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,8
-
- mov [edi],ecx
- add edi,4
- // }
-
- // 3rd dword output {
- mov eax,edx
- shr eax,7
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,8
- shl ecx,16
-
- mov eax,edx
- shr eax,11
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,8
-
- mov [edi],ecx
- add edi,4
- // }
-
- // 4th dword output {
- mov eax,edx
- shl eax,1
- and eax,0x1E
- mov cx,[ebx+eax]
- ror cx,8
- shl ecx,16
-
- shr edx,3
- and edx,0x1E
- mov cx,[ebx+edx]
- ror cx,8
-
- mov [edi],ecx
- add edi,4
- // }
- // *
-
- pop ecx
-
- dec ecx
- jnz x_loop_2
-
- mov eax,esi
- add eax,[line]
- mov esi,[src]
- sub eax,esi
- and eax,0x7FF
- add esi,eax
- add edi,[ext]
-
- pop ecx
- dec ecx
- jnz y_loop
-
-end_y_loop:
- pop edi
- pop esi
- pop ebx
- mov esp, ebp
- pop ebp
- ret
- }
-}
-
-/*****************************************************************
- Size: 0, Format: 3
-
- ** BY GUGAMAN **
- 2009 ported to NASM - Sergey (Gonetz) Lipski
-*****************************************************************/
-extern "C" void __declspec(naked) asmLoad4bIA (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext)
-{
- _asm {
- push ebp
- mov ebp, esp
- push ebx
- push esi
- push edi
-
- mov esi,[src]
- mov edi,[dst]
- mov ecx,[height]
-y_loop:
- push ecx
- mov ecx,[wid_64]
-x_loop:
- push ecx
-
- mov eax,[esi] // read all 8 pixels
- bswap eax
- add esi,4
- mov edx,eax
-
- // 1st dword {
- xor ecx,ecx
-
- // pixel #1
- // IIIAxxxxxxxxxxxxxxxxxxxxxxxxxxxx
- // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
- mov eax,edx
- shr eax,24 //Alpha
- and eax,0x00000010
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- mov eax,edx
- shr eax,28 // Intensity
- and eax,0x0000000E
- or ecx,eax
- shr eax,3
- or ecx,eax
-
- // pixel #2
- // xxxxIIIAxxxxxxxxxxxxxxxxxxxxxxxx
- // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
- mov eax,edx
- shr eax,12 //Alpha
- and eax,0x00001000
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- mov eax,edx
- shr eax,16 // Intensity
- and eax,0x00000E00
- or ecx,eax
- shr eax,3
- and eax,0x00000100
- or ecx,eax
-
- // pixel #3
- // xxxxxxxxIIIAxxxxxxxxxxxxxxxxxxxx
- // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
- //Alpha
- mov eax,edx
- and eax,0x00100000
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- mov eax,edx
- shr eax,4 // Intensity
- and eax,0x000E0000
- or ecx,eax
- shr eax,3
- and eax,0x00010000
- or ecx,eax
-
- // pixel #4
- // xxxxxxxxxxxxIIIAxxxxxxxxxxxxxxxx
- // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
- mov eax,edx
- shl eax,12 //Alpha
- and eax,0x10000000
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- mov eax,edx
- shl eax,8 // Intensity
- and eax,0x0E000000
- or ecx,eax
- shr eax,3
- and eax,0x01000000
- or ecx,eax
-
-
- mov [edi],ecx
- add edi,4
- // }
-
-// 2nd dword {
- xor ecx,ecx
-
- // pixel #5
- // xxxxxxxxxxxxxxxxIIIAxxxxxxxxxxxx
- // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
- mov eax,edx
- shr eax,8 //Alpha
- and eax,0x00000010
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- mov eax,edx
- shr eax,12 // Intensity
- and eax,0x0000000E
- or ecx,eax
- shr eax,3
- or ecx,eax
-
- // pixel #6
- // xxxxxxxxxxxxxxxxxxxxIIIAxxxxxxxx
- // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
- //Alpha
- mov eax,edx
- shl eax,4
- and eax,0x00001000
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- mov eax,edx // Intensity
- and eax,0x00000E00
- or ecx,eax
- shr eax,3
- and eax,0x00000100
- or ecx,eax
-
- // pixel #7
- // xxxxxxxxxxxxxxxxxxxxxxxxIIIAxxxx
- // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
- //Alpha
- mov eax,edx
- shl eax,16
- and eax,0x00100000
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- mov eax,edx
- shl eax,12 // Intensity
- and eax,0x000E0000
- or ecx,eax
- shr eax,3
- and eax,0x00010000
- or ecx,eax
-
- // pixel #8
- // xxxxxxxxxxxxxxxxxxxxxxxxxxxxIIIA
- // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
- mov eax,edx
- shl eax,28 //Alpha
- and eax,0x10000000
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- mov eax,edx
- shl eax,24 // Intensity
- and eax,0x0E000000
- or ecx,eax
- shr eax,3
- and eax,0x01000000
- or ecx,eax
-
- mov [edi],ecx
- add edi,4
- // }
-
- // * copy
- mov eax,[esi] // read all 8 pixels
- bswap eax
- add esi,4
- mov edx,eax
-
- // 1st dword {
- xor ecx,ecx
-
- // pixel #1
- // IIIAxxxxxxxxxxxxxxxxxxxxxxxxxxxx
- // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
- mov eax,edx
- shr eax,24 //Alpha
- and eax,0x00000010
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- mov eax,edx
- shr eax,28 // Intensity
- and eax,0x0000000E
- or ecx,eax
- shr eax,3
- or ecx,eax
-
- // pixel #2
- // xxxxIIIAxxxxxxxxxxxxxxxxxxxxxxxx
- // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
- mov eax,edx
- shr eax,12 //Alpha
- and eax,0x00001000
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- mov eax,edx
- shr eax,16 // Intensity
- and eax,0x00000E00
- or ecx,eax
- shr eax,3
- and eax,0x00000100
- or ecx,eax
-
- // pixel #3
- // xxxxxxxxIIIAxxxxxxxxxxxxxxxxxxxx
- // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
- //Alpha
- mov eax,edx
- and eax,0x00100000
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- mov eax,edx
- shr eax,4 // Intensity
- and eax,0x000E0000
- or ecx,eax
- shr eax,3
- and eax,0x00010000
- or ecx,eax
-
- // pixel #4
- // xxxxxxxxxxxxIIIAxxxxxxxxxxxxxxxx
- // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
- mov eax,edx
- shl eax,12 //Alpha
- and eax,0x10000000
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- mov eax,edx
- shl eax,8 // Intensity
- and eax,0x0E000000
- or ecx,eax
- shr eax,3
- and eax,0x01000000
- or ecx,eax
-
-
- mov [edi],ecx
- add edi,4
- // }
-
-// 2nd dword {
- xor ecx,ecx
-
- // pixel #5
- // xxxxxxxxxxxxxxxxIIIAxxxxxxxxxxxx
- // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
- mov eax,edx
- shr eax,8 //Alpha
- and eax,0x00000010
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- mov eax,edx
- shr eax,12 // Intensity
- and eax,0x0000000E
- or ecx,eax
- shr eax,3
- or ecx,eax
-
- // pixel #6
- // xxxxxxxxxxxxxxxxxxxxIIIAxxxxxxxx
- // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
- //Alpha
- mov eax,edx
- shl eax,4
- and eax,0x00001000
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- mov eax,edx // Intensity
- and eax,0x00000E00
- or ecx,eax
- shr eax,3
- and eax,0x00000100
- or ecx,eax
-
- // pixel #7
- // xxxxxxxxxxxxxxxxxxxxxxxxIIIAxxxx
- // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
- //Alpha
- mov eax,edx
- shl eax,16
- and eax,0x00100000
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- mov eax,edx
- shl eax,12 // Intensity
- and eax,0x000E0000
- or ecx,eax
- shr eax,3
- and eax,0x00010000
- or ecx,eax
-
- // pixel #8
- // xxxxxxxxxxxxxxxxxxxxxxxxxxxxIIIA
- // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
- mov eax,edx
- shl eax,28 //Alpha
- and eax,0x10000000
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- mov eax,edx
- shl eax,24 // Intensity
- and eax,0x0E000000
- or ecx,eax
- shr eax,3
- and eax,0x01000000
- or ecx,eax
-
- mov [edi],ecx
- add edi,4
- // }
-
- // *
-
- pop ecx
- dec ecx
- jnz x_loop
-
- pop ecx
- dec ecx
- jz near end_y_loop
- push ecx
-
- add esi,[line]
- add edi,[ext]
-
- mov ecx,[wid_64]
-x_loop_2:
- push ecx
-
- mov eax,[esi+4] // read all 8 pixels
- bswap eax
- mov edx,eax
-
- // 1st dword {
- xor ecx,ecx
-
- // pixel #1
- // IIIAxxxxxxxxxxxxxxxxxxxxxxxxxxxx
- // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
- mov eax,edx
- shr eax,24 //Alpha
- and eax,0x00000010
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- mov eax,edx
- shr eax,28 // Intensity
- and eax,0x0000000E
- or ecx,eax
- shr eax,3
- or ecx,eax
-
- // pixel #2
- // xxxxIIIAxxxxxxxxxxxxxxxxxxxxxxxx
- // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
- mov eax,edx
- shr eax,12 //Alpha
- and eax,0x00001000
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- mov eax,edx
- shr eax,16 // Intensity
- and eax,0x00000E00
- or ecx,eax
- shr eax,3
- and eax,0x00000100
- or ecx,eax
-
- // pixel #3
- // xxxxxxxxIIIAxxxxxxxxxxxxxxxxxxxx
- // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
- //Alpha
- mov eax,edx
- and eax,0x00100000
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- mov eax,edx
- shr eax,4 // Intensity
- and eax,0x000E0000
- or ecx,eax
- shr eax,3
- and eax,0x00010000
- or ecx,eax
-
- // pixel #4
- // xxxxxxxxxxxxIIIAxxxxxxxxxxxxxxxx
- // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
- mov eax,edx
- shl eax,12 //Alpha
- and eax,0x10000000
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- mov eax,edx
- shl eax,8 // Intensity
- and eax,0x0E000000
- or ecx,eax
- shr eax,3
- and eax,0x01000000
- or ecx,eax
-
-
- mov [edi],ecx
- add edi,4
- // }
-
-// 2nd dword {
- xor ecx,ecx
-
- // pixel #5
- // xxxxxxxxxxxxxxxxIIIAxxxxxxxxxxxx
- // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
- mov eax,edx
- shr eax,8 //Alpha
- and eax,0x00000010
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- mov eax,edx
- shr eax,12 // Intensity
- and eax,0x0000000E
- or ecx,eax
- shr eax,3
- or ecx,eax
-
- // pixel #6
- // xxxxxxxxxxxxxxxxxxxxIIIAxxxxxxxx
- // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
- //Alpha
- mov eax,edx
- shl eax,4
- and eax,0x00001000
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- mov eax,edx // Intensity
- and eax,0x00000E00
- or ecx,eax
- shr eax,3
- and eax,0x00000100
- or ecx,eax
-
- // pixel #7
- // xxxxxxxxxxxxxxxxxxxxxxxxIIIAxxxx
- // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
- //Alpha
- mov eax,edx
- shl eax,16
- and eax,0x00100000
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- mov eax,edx
- shl eax,12 // Intensity
- and eax,0x000E0000
- or ecx,eax
- shr eax,3
- and eax,0x00010000
- or ecx,eax
-
- // pixel #8
- // xxxxxxxxxxxxxxxxxxxxxxxxxxxxIIIA
- // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
- mov eax,edx
- shl eax,28 //Alpha
- and eax,0x10000000
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- mov eax,edx
- shl eax,24 // Intensity
- and eax,0x0E000000
- or ecx,eax
- shr eax,3
- and eax,0x01000000
- or ecx,eax
-
- mov [edi],ecx
- add edi,4
- // }
-
- // * copy
- mov eax,[esi] // read all 8 pixels
- bswap eax
- add esi,8
- mov edx,eax
-
-// 1st dword {
- xor ecx,ecx
-
- // pixel #1
- // IIIAxxxxxxxxxxxxxxxxxxxxxxxxxxxx
- // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
- mov eax,edx
- shr eax,24 //Alpha
- and eax,0x00000010
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- mov eax,edx
- shr eax,28 // Intensity
- and eax,0x0000000E
- or ecx,eax
- shr eax,3
- or ecx,eax
-
- // pixel #2
- // xxxxIIIAxxxxxxxxxxxxxxxxxxxxxxxx
- // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
- mov eax,edx
- shr eax,12 //Alpha
- and eax,0x00001000
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- mov eax,edx
- shr eax,16 // Intensity
- and eax,0x00000E00
- or ecx,eax
- shr eax,3
- and eax,0x00000100
- or ecx,eax
-
- // pixel #3
- // xxxxxxxxIIIAxxxxxxxxxxxxxxxxxxxx
- // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
- //Alpha
- mov eax,edx
- and eax,0x00100000
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- mov eax,edx
- shr eax,4 // Intensity
- and eax,0x000E0000
- or ecx,eax
- shr eax,3
- and eax,0x00010000
- or ecx,eax
-
- // pixel #4
- // xxxxxxxxxxxxIIIAxxxxxxxxxxxxxxxx
- // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
- mov eax,edx
- shl eax,12 //Alpha
- and eax,0x10000000
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- mov eax,edx
- shl eax,8 // Intensity
- and eax,0x0E000000
- or ecx,eax
- shr eax,3
- and eax,0x01000000
- or ecx,eax
-
-
- mov [edi],ecx
- add edi,4
- // }
-
-// 2nd dword {
- xor ecx,ecx
-
- // pixel #5
- // xxxxxxxxxxxxxxxxIIIAxxxxxxxxxxxx
- // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
- mov eax,edx
- shr eax,8 //Alpha
- and eax,0x00000010
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- mov eax,edx
- shr eax,12 // Intensity
- and eax,0x0000000E
- or ecx,eax
- shr eax,3
- or ecx,eax
-
- // pixel #6
- // xxxxxxxxxxxxxxxxxxxxIIIAxxxxxxxx
- // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
- //Alpha
- mov eax,edx
- shl eax,4
- and eax,0x00001000
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- mov eax,edx // Intensity
- and eax,0x00000E00
- or ecx,eax
- shr eax,3
- and eax,0x00000100
- or ecx,eax
-
- // pixel #7
- // xxxxxxxxxxxxxxxxxxxxxxxxIIIAxxxx
- // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
- //Alpha
- mov eax,edx
- shl eax,16
- and eax,0x00100000
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- mov eax,edx
- shl eax,12 // Intensity
- and eax,0x000E0000
- or ecx,eax
- shr eax,3
- and eax,0x00010000
- or ecx,eax
-
- // pixel #8
- // xxxxxxxxxxxxxxxxxxxxxxxxxxxxIIIA
- // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
- mov eax,edx
- shl eax,28 //Alpha
- and eax,0x10000000
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- shl eax,1
- or ecx,eax
- mov eax,edx
- shl eax,24 // Intensity
- and eax,0x0E000000
- or ecx,eax
- shr eax,3
- and eax,0x01000000
- or ecx,eax
-
- mov [edi],ecx
- add edi,4
- // }
- // *
-
- pop ecx
- dec ecx
- jnz x_loop_2
-
- add esi,[line]
- add edi,[ext]
-
- pop ecx
- dec ecx
- jnz y_loop
-
-end_y_loop:
- pop edi
- pop esi
- pop ebx
- mov esp, ebp
- pop ebp
- ret
- }
-}
-
-//****************************************************************
-// Size: 0, Format: 4
-// 2009 ported to NASM - Sergey (Gonetz) Lipski
-
-extern "C" void __declspec(naked) asmLoad4bI (wxUIntPtr src, int dst, wxUIntPtr wid_64, int height, int line, int ext)
-{
- _asm {
- push ebp
- mov ebp, esp
- push ebx
- push esi
- push edi
-
- mov esi,[src]
- mov edi,[dst]
- mov ecx,[height]
-y_loop:
- push ecx
- mov ecx,[wid_64]
-x_loop:
- push ecx
-
- mov eax,[esi] // read all 8 pixels
- bswap eax
- add esi,4
- mov edx,eax
-
- // 1st dword {
- xor ecx,ecx
- shr eax,28 // 0xF0000000 -> 0x0000000F
- or ecx,eax
- shl eax,4
- or ecx,eax
-
- mov eax,edx // 0x0F000000 -> 0x00000F00
- shr eax,16
- and eax,0x00000F00
- or ecx,eax
- shl eax,4
- or ecx,eax
-
- mov eax,edx
- shr eax,4 // 0x00F00000 -> 0x000F0000
- and eax,0x000F0000
- or ecx,eax
- shl eax,4
- or ecx,eax
-
- mov eax,edx
- shl eax,8 // 0x000F0000 -> 0x0F000000
- and eax,0x0F000000
- or ecx,eax
- shl eax,4
- or ecx,eax
-
- mov [edi],ecx
- add edi,4
- // }
-
- // 2nd dword {
- xor ecx,ecx
- mov eax,edx
- shr eax,12 // 0x0000F000 -> 0x0000000F
- and eax,0x0000000F
- or ecx,eax
- shl eax,4
- or ecx,eax
-
- mov eax,edx // 0x00000F00 -> 0x00000F00
- and eax,0x00000F00
- or ecx,eax
- shl eax,4
- or ecx,eax
-
- mov eax,edx
- shl eax,12 // 0x000000F0 -> 0x000F0000
- and eax,0x000F0000
- or ecx,eax
- shl eax,4
- or ecx,eax
-
- shl edx,24 // 0x0000000F -> 0x0F000000
- and edx,0x0F000000
- or ecx,edx
- shl edx,4
- or ecx,edx
-
- mov [edi],ecx
- add edi,4
- // }
-
- // * copy
- mov eax,[esi] // read all 8 pixels
- bswap eax
- add esi,4
- mov edx,eax
-
- // 1st dword {
- xor ecx,ecx
- shr eax,28 // 0xF0000000 -> 0x0000000F
- or ecx,eax
- shl eax,4
- or ecx,eax
-
- mov eax,edx // 0x0F000000 -> 0x00000F00
- shr eax,16
- and eax,0x00000F00
- or ecx,eax
- shl eax,4
- or ecx,eax
-
- mov eax,edx
- shr eax,4 // 0x00F00000 -> 0x000F0000
- and eax,0x000F0000
- or ecx,eax
- shl eax,4
- or ecx,eax
-
- mov eax,edx
- shl eax,8 // 0x000F0000 -> 0x0F000000
- and eax,0x0F000000
- or ecx,eax
- shl eax,4
- or ecx,eax
-
- mov [edi],ecx
- add edi,4
- // }
-
- // 2nd dword {
- xor ecx,ecx
- mov eax,edx
- shr eax,12 // 0x0000F000 -> 0x0000000F
- and eax,0x0000000F
- or ecx,eax
- shl eax,4
- or ecx,eax
-
- mov eax,edx // 0x00000F00 -> 0x00000F00
- and eax,0x00000F00
- or ecx,eax
- shl eax,4
- or ecx,eax
-
- mov eax,edx
- shl eax,12 // 0x000000F0 -> 0x000F0000
- and eax,0x000F0000
- or ecx,eax
- shl eax,4
- or ecx,eax
-
- shl edx,24 // 0x0000000F -> 0x0F000000
- and edx,0x0F000000
- or ecx,edx
- shl edx,4
- or ecx,edx
-
- mov [edi],ecx
- add edi,4
- // }
- // *
-
- pop ecx
- dec ecx
- jnz x_loop
-
- pop ecx
- dec ecx
- jz near end_y_loop
- push ecx
-
- add esi,[line]
- add edi,[ext]
-
- mov ecx,[wid_64]
-x_loop_2:
- push ecx
-
- mov eax,[esi+4] // read all 8 pixels
- bswap eax
- mov edx,eax
-
- // 1st dword {
- xor ecx,ecx
- shr eax,28 // 0xF0000000 -> 0x0000000F
- or ecx,eax
- shl eax,4
- or ecx,eax
-
- mov eax,edx // 0x0F000000 -> 0x00000F00
- shr eax,16
- and eax,0x00000F00
- or ecx,eax
- shl eax,4
- or ecx,eax
-
- mov eax,edx
- shr eax,4 // 0x00F00000 -> 0x000F0000
- and eax,0x000F0000
- or ecx,eax
- shl eax,4
- or ecx,eax
-
- mov eax,edx
- shl eax,8 // 0x000F0000 -> 0x0F000000
- and eax,0x0F000000
- or ecx,eax
- shl eax,4
- or ecx,eax
-
- mov [edi],ecx
- add edi,4
- // }
-
- // 2nd dword {
- xor ecx,ecx
- mov eax,edx
- shr eax,12 // 0x0000F000 -> 0x0000000F
- and eax,0x0000000F
- or ecx,eax
- shl eax,4
- or ecx,eax
-
- mov eax,edx // 0x00000F00 -> 0x00000F00
- and eax,0x00000F00
- or ecx,eax
- shl eax,4
- or ecx,eax
-
- mov eax,edx
- shl eax,12 // 0x000000F0 -> 0x000F0000
- and eax,0x000F0000
- or ecx,eax
- shl eax,4
- or ecx,eax
-
- shl edx,24 // 0x0000000F -> 0x0F000000
- and edx,0x0F000000
- or ecx,edx
- shl edx,4
- or ecx,edx
-
- mov [edi],ecx
- add edi,4
- // }
-
- // * copy
- mov eax,[esi] // read all 8 pixels
- bswap eax
- add esi,8
- mov edx,eax
-
- // 1st dword {
- xor ecx,ecx
- shr eax,28 // 0xF0000000 -> 0x0000000F
- or ecx,eax
- shl eax,4
- or ecx,eax
-
- mov eax,edx // 0x0F000000 -> 0x00000F00
- shr eax,16
- and eax,0x00000F00
- or ecx,eax
- shl eax,4
- or ecx,eax
-
- mov eax,edx
- shr eax,4 // 0x00F00000 -> 0x000F0000
- and eax,0x000F0000
- or ecx,eax
- shl eax,4
- or ecx,eax
-
- mov eax,edx
- shl eax,8 // 0x000F0000 -> 0x0F000000
- and eax,0x0F000000
- or ecx,eax
- shl eax,4
- or ecx,eax
-
- mov [edi],ecx
- add edi,4
- // }
-
- // 2nd dword {
- xor ecx,ecx
- mov eax,edx
- shr eax,12 // 0x0000F000 -> 0x0000000F
- and eax,0x0000000F
- or ecx,eax
- shl eax,4
- or ecx,eax
-
- mov eax,edx // 0x00000F00 -> 0x00000F00
- and eax,0x00000F00
- or ecx,eax
- shl eax,4
- or ecx,eax
-
- mov eax,edx
- shl eax,12 // 0x000000F0 -> 0x000F0000
- and eax,0x000F0000
- or ecx,eax
- shl eax,4
- or ecx,eax
-
- shl edx,24 // 0x0000000F -> 0x0F000000
- and edx,0x0F000000
- or ecx,edx
- shl edx,4
- or ecx,edx
-
- mov [edi],ecx
- add edi,4
- // }
- // *
-
- pop ecx
- dec ecx
- jnz x_loop_2
-
- add esi,[line]
- add edi,[ext]
-
- pop ecx
- dec ecx
- jnz y_loop
-
-end_y_loop:
- pop edi
- pop esi
- pop ebx
- mov esp, ebp
- pop ebp
- ret
- }
-}
-//****************************************************************
-//8b textures load
-//****************************************************************
-
-//****************************************************************
-// Size: 1, Format: 2
-//
-// 2008.03.29 cleaned up - H.Morii
-// 2009 ported to NASM - Sergey (Gonetz) Lipski
-
-extern "C" void __declspec(naked) asmLoad8bCI (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext, wxUIntPtr pal)
-{
- _asm {
- push ebp
- mov ebp, esp
- push ebx
- push esi
- push edi
-
- mov ebx,[pal]
- mov esi,[src]
- mov edi,[dst]
- mov ecx,[height]
-y_loop:
- push ecx
- mov ecx,[wid_64]
-x_loop:
- push ecx
-
- mov eax,[esi] // read all 4 pixels
- bswap eax
- add esi,4
- mov edx,eax
-
- // 1st dword output {
- shr eax,15
- and eax,0x1FE
- mov cx,[ebx+eax]
- ror cx,1
- shl ecx,16
-
- mov eax,edx
- shr eax,23
- and eax,0x1FE
- mov cx,[ebx+eax]
- ror cx,1
-
- mov [edi],ecx
- add edi,4
- // }
-
- // 2nd dword output {
- mov eax,edx
- shl eax,1
- and eax,0x1FE
- mov cx,[ebx+eax]
- ror cx,1
- shl ecx,16
-
- shr edx,7
- and edx,0x1FE
- mov cx,[ebx+edx]
- ror cx,1
-
- mov [edi],ecx
- add edi,4
- // }
-
- // * copy
- mov eax,[esi] // read all 4 pixels
- bswap eax
- add esi,4
- mov edx,eax
-
- // 1st dword output {
- shr eax,15
- and eax,0x1FE
- mov cx,[ebx+eax]
- ror cx,1
- shl ecx,16
-
- mov eax,edx
- shr eax,23
- and eax,0x1FE
- mov cx,[ebx+eax]
- ror cx,1
-
- mov [edi],ecx
- add edi,4
- // }
-
- // 2nd dword output {
- mov eax,edx
- shl eax,1
- and eax,0x1FE
- mov cx,[ebx+eax]
- ror cx,1
- shl ecx,16
-
- shr edx,7
- and edx,0x1FE
- mov cx,[ebx+edx]
- ror cx,1
-
- mov [edi],ecx
- add edi,4
- // }
- // *
-
- pop ecx
-
- dec ecx
- jnz x_loop
-
- pop ecx
- dec ecx
- jz near end_y_loop
- push ecx
-
- mov eax,esi
- add eax,[line]
- mov esi,[src]
- sub eax,esi
- and eax,0x7FF
- add esi,eax
- add edi,[ext]
-
- mov ecx,[wid_64]
-x_loop_2:
- push ecx
-
- mov eax,[esi+4] // read all 4 pixels
- bswap eax
- mov edx,eax
-
- // 1st dword output {
- shr eax,15
- and eax,0x1FE
- mov cx,[ebx+eax]
- ror cx,1
- shl ecx,16
-
- mov eax,edx
- shr eax,23
- and eax,0x1FE
- mov cx,[ebx+eax]
- ror cx,1
-
- mov [edi],ecx
- add edi,4
- // }
-
- // 2nd dword output {
- mov eax,edx
- shl eax,1
- and eax,0x1FE
- mov cx,[ebx+eax]
- ror cx,1
- shl ecx,16
-
- shr edx,7
- and edx,0x1FE
- mov cx,[ebx+edx]
- ror cx,1
-
- mov [edi],ecx
- add edi,4
- // }
-
- // * copy
- mov eax,[esi] // read all 4 pixels
- bswap eax
- mov edx,esi
- add edx,8
- mov esi,[src]
- sub edx,esi
- and edx,0x7FF
- add esi,edx
- mov edx,eax
-
- // 1st dword output {
- shr eax,15
- and eax,0x1FE
- mov cx,[ebx+eax]
- ror cx,1
- shl ecx,16
-
- mov eax,edx
- shr eax,23
- and eax,0x1FE
- mov cx,[ebx+eax]
- ror cx,1
-
- mov [edi],ecx
- add edi,4
- // }
-
- // 2nd dword output {
- mov eax,edx
- shl eax,1
- and eax,0x1FE
- mov cx,[ebx+eax]
- ror cx,1
- shl ecx,16
-
- shr edx,7
- and edx,0x1FE
- mov cx,[ebx+edx]
- ror cx,1
-
- mov [edi],ecx
- add edi,4
- // }
- // *
-
- pop ecx
-
- dec ecx
- jnz x_loop_2
-
- mov eax,esi
- add eax,[line]
- mov esi,[src]
- sub eax,esi
- and eax,0x7FF
- add esi,eax
- add edi,[ext]
-
- pop ecx
- dec ecx
- jnz y_loop
-
-end_y_loop:
- pop edi
- pop esi
- pop ebx
- mov esp, ebp
- pop ebp
- ret
- }
-}
-
-extern "C" void __declspec(naked) asmLoad8bIA8 (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext, wxUIntPtr pal)
-{
- _asm {
- push ebp
- mov ebp, esp
- push ebx
- push esi
- push edi
-
- mov ebx,[pal]
- mov esi,[src]
- mov edi,[dst]
- mov ecx,[height]
-y_loop:
- push ecx
- mov ecx,[wid_64]
-x_loop:
- push ecx
-
- mov eax,[esi] // read all 4 pixels
- bswap eax
- add esi,4
- mov edx,eax
-
- // 1st dword output {
- shr eax,15
- and eax,0x1FE
- mov cx,[ebx+eax]
- ror cx,8
- shl ecx,16
-
- mov eax,edx
- shr eax,23
- and eax,0x1FE
- mov cx,[ebx+eax]
- ror cx,8
-
- mov [edi],ecx
- add edi,4
- // }
-
- // 2nd dword output {
- mov eax,edx
- shl eax,1
- and eax,0x1FE
- mov cx,[ebx+eax]
- ror cx,8
- shl ecx,16
-
- shr edx,7
- and edx,0x1FE
- mov cx,[ebx+edx]
- ror cx,8
-
- mov [edi],ecx
- add edi,4
- // }
-
- // * copy
- mov eax,[esi] // read all 4 pixels
- bswap eax
- add esi,4
- mov edx,eax
-
- // 1st dword output {
- shr eax,15
- and eax,0x1FE
- mov cx,[ebx+eax]
- ror cx,8
- shl ecx,16
-
- mov eax,edx
- shr eax,23
- and eax,0x1FE
- mov cx,[ebx+eax]
- ror cx,8
-
- mov [edi],ecx
- add edi,4
- // }
-
- // 2nd dword output {
- mov eax,edx
- shl eax,1
- and eax,0x1FE
- mov cx,[ebx+eax]
- ror cx,8
- shl ecx,16
-
- shr edx,7
- and edx,0x1FE
- mov cx,[ebx+edx]
- ror cx,8
-
- mov [edi],ecx
- add edi,4
- // }
- // *
-
- pop ecx
-
- dec ecx
- jnz x_loop
-
- pop ecx
- dec ecx
- jz near end_y_loop
- push ecx
-
- add esi,[line]
- add edi,[ext]
-
- mov ecx,[wid_64]
-x_loop_2:
- push ecx
-
- mov eax,[esi+4] // read all 4 pixels
- bswap eax
- mov edx,eax
-
- // 1st dword output {
- shr eax,15
- and eax,0x1FE
- mov cx,[ebx+eax]
- ror cx,8
- shl ecx,16
-
- mov eax,edx
- shr eax,23
- and eax,0x1FE
- mov cx,[ebx+eax]
- ror cx,8
-
- mov [edi],ecx
- add edi,4
- // }
-
- // 2nd dword output {
- mov eax,edx
- shl eax,1
- and eax,0x1FE
- mov cx,[ebx+eax]
- ror cx,8
- shl ecx,16
-
- shr edx,7
- and edx,0x1FE
- mov cx,[ebx+edx]
- ror cx,8
-
- mov [edi],ecx
- add edi,4
- // }
-
- // * copy
- mov eax,[esi] // read all 4 pixels
- bswap eax
- add esi,8
- mov edx,eax
-
- // 1st dword output {
- shr eax,15
- and eax,0x1FE
- mov cx,[ebx+eax]
- ror cx,8
- shl ecx,16
-
- mov eax,edx
- shr eax,23
- and eax,0x1FE
- mov cx,[ebx+eax]
- ror cx,8
-
- mov [edi],ecx
- add edi,4
- // }
-
- // 2nd dword output {
- mov eax,edx
- shl eax,1
- and eax,0x1FE
- mov cx,[ebx+eax]
- ror cx,8
- shl ecx,16
-
- shr edx,7
- and edx,0x1FE
- mov cx,[ebx+edx]
- ror cx,8
-
- mov [edi],ecx
- add edi,4
- // }
- // *
-
- pop ecx
-
- dec ecx
- jnz x_loop_2
-
- add esi,[line]
- add edi,[ext]
-
- pop ecx
- dec ecx
- jnz y_loop
-
-end_y_loop:
- pop edi
- pop esi
- pop ebx
- mov esp, ebp
- pop ebp
- ret
- }
-}
-
-//****************************************************************
-// Size: 1, Format: 3
-//
-// ** by Gugaman **
-//
-// 2008.03.29 cleaned up - H.Morii
-// 2009 ported to NASM - Sergey (Gonetz) Lipski
-
-extern "C" void __declspec(naked) asmLoad8bIA4 (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext)
-{
- _asm {
- push ebp
- mov ebp, esp
- push ebx
- push esi
- push edi
-
- mov esi,[src]
- mov edi,[dst]
- mov ecx,[height]
-y_loop:
- push ecx
- mov ecx,[wid_64]
-x_loop:
- mov eax,[esi] // read all 4 pixels
- mov edx,eax
-
- shr eax,4 //all alpha
- shl edx,4
- and eax,0x0F0F0F0F
- and edx,0xF0F0F0F0
- add esi,4
- or eax,edx
-
- mov [edi],eax // save dword
- add edi,4
-
- mov eax,[esi] // read all 4 pixels
- mov edx,eax
-
- shr eax,4 //all alpha
- shl edx,4
- and eax,0x0F0F0F0F
- and edx,0xF0F0F0F0
- add esi,4
- or eax,edx
-
- mov [edi],eax // save dword
- add edi,4
- // *
-
- dec ecx
- jnz x_loop
-
- pop ecx
- dec ecx
- jz end_y_loop
- push ecx
-
- add esi,[line]
- add edi,[ext]
-
- mov ecx,[wid_64]
-x_loop_2:
- mov eax,[esi+4] // read both pixels
- mov edx,eax
-
- shr eax,4 //all alpha
- shl edx,4
- and eax,0x0F0F0F0F
- and edx,0xF0F0F0F0
- or eax,edx
-
- mov [edi],eax //save dword
- add edi,4
-
- mov eax,[esi] // read both pixels
- add esi,8
- mov edx,eax
-
- shr eax,4 //all alpha
- shl edx,4
- and eax,0x0F0F0F0F
- and edx,0xF0F0F0F0
- or eax,edx
-
- mov [edi],eax //save dword
- add edi,4
- // *
-
- dec ecx
- jnz x_loop_2
-
- add esi,[line]
- add edi,[ext]
-
- pop ecx
- dec ecx
- jnz y_loop
-
-end_y_loop:
- pop edi
- pop esi
- pop ebx
- mov esp, ebp
- pop ebp
- ret
- }
-}
-
-//****************************************************************
-// Size: 1, Format: 4
-//
-// ** by Gugaman **
-// 2009 ported to NASM - Sergey (Gonetz) Lipski
-
-extern "C" void __declspec(naked) asmLoad8bI (wxUIntPtr src, int dst, wxUIntPtr wid_64, int height, int line, int ext)
-{
- _asm {
- push ebp
- mov ebp, esp
- push ebx
- push esi
- push edi
-
- mov esi,[src]
- mov edi,[dst]
- mov ecx,[height]
-y_loop:
- push ecx
- mov ecx,[wid_64]
-x_loop:
- mov eax,[esi] // read all 4 pixels
- add esi,4
-
- mov [edi],eax // save dword
- add edi,4
-
- mov eax,[esi] // read all 4 pixels
- add esi,4
-
- mov [edi],eax // save dword
- add edi,4
- // *
-
- dec ecx
- jnz x_loop
-
- pop ecx
- dec ecx
- jz end_y_loop
- push ecx
-
- add esi,[line]
- add edi,[ext]
-
- mov ecx,[wid_64]
-x_loop_2:
- mov eax,[esi+4] // read both pixels
-
- mov [edi],eax //save dword
- add edi,4
-
- mov eax,[esi] // read both pixels
- add esi,8
-
- mov [edi],eax //save dword
- add edi,4
- // *
-
- dec ecx
- jnz x_loop_2
-
- add esi,[line]
- add edi,[ext]
-
- pop ecx
- dec ecx
- jnz y_loop
-
-end_y_loop:
- pop edi
- pop esi
- pop ebx
- mov esp, ebp
- pop ebp
- ret
- }
-}
-
-
-//****************************************************************
-//16b textures load
-//****************************************************************
-
-//****************************************************************
-// Size: 2, Format: 0
-//
-// 2008.03.29 cleaned up - H.Morii
-// 2009 ported to NASM - Sergey (Gonetz) Lipski
-
-extern "C" void __declspec(naked) asmLoad16bRGBA (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext)
-{
- _asm {
- align 4
- push ebp
- mov ebp,esp
- push ebx
- push esi
- push edi
-
- mov esi,[src]
- mov edi,[dst]
- mov ecx,[height]
-y_loop:
- push ecx
- mov ecx,[wid_64]
-x_loop:
- mov eax,[esi] // read both pixels
- mov ebx,[esi+4] // read both pixels
- bswap eax
- bswap ebx
-
- ror ax,1
- ror bx,1
- ror eax,16
- ror ebx,16
- ror ax,1
- ror bx,1
-
- mov [edi],eax
- mov [edi+4],ebx
- add esi,8
- add edi,8
-
- dec ecx
- jnz x_loop
-
- pop ecx
- dec ecx
- jz end_y_loop
- push ecx
-
- mov eax,esi
- add eax,[line]
- mov esi,[src]
- sub eax, esi
- and eax, 0xFFF
- add esi, eax
- add edi,[ext]
-
- mov ecx,[wid_64]
-x_loop_2:
- mov eax,[esi+4] // read both pixels
- mov ebx,[esi] // read both pixels
- bswap eax
- bswap ebx
-
- ror ax,1
- ror bx,1
- ror eax,16
- ror ebx,16
- ror ax,1
- ror bx,1
-
- mov [edi],eax
- mov [edi+4],ebx
- add esi,8
- add edi,8
-
- dec ecx
- jnz x_loop_2
-
- mov eax,esi
- add eax,[line]
- mov esi,[src]
- sub eax, esi
- and eax, 0xFFF
- add esi, eax
- add edi,[ext]
-
- pop ecx
- dec ecx
- jnz y_loop
-
-end_y_loop:
- pop edi
- pop esi
- pop ebx
- mov esp, ebp
- pop ebp
- ret
- }
-}
-
-
-
-//****************************************************************
-// Size: 2, Format: 3
-//
-// ** by Gugaman/Dave2001 **
-//
-// 2008.03.29 cleaned up - H.Morii
-// 2009 ported to NASM - Sergey (Gonetz) Lipski
-
-extern "C" void __declspec(naked) asmLoad16bIA (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext)
-{
- _asm {
- ALIGN 4
-
- push ebp
- mov ebp, esp
- push ebx
- push esi
- push edi
-
- mov esi,[src]
- mov edi,[dst]
- mov ecx,[height]
-y_loop:
- push ecx
- mov ecx,[wid_64]
-x_loop:
- mov eax,[esi] // read both pixels
- mov ebx,[esi+4] // read both pixels
- mov [edi],eax
- mov [edi+4],ebx
- add esi,8
- add edi,8
-
- dec ecx
- jnz x_loop
-
- pop ecx
- dec ecx
- jz end_y_loop
- push ecx
-
- add esi,[line]
- add edi,[ext]
-
- mov ecx,[wid_64]
-x_loop_2:
- mov eax,[esi+4] // read both pixels
- mov ebx,[esi] // read both pixels
- mov [edi],eax
- mov [edi+4],ebx
- add esi,8
- add edi,8
-
- dec ecx
- jnz x_loop_2
-
- add esi,[line]
- add edi,[ext]
-
- pop ecx
- dec ecx
- jnz y_loop
-
-end_y_loop:
- pop edi
- pop esi
- pop ebx
- mov esp, ebp
- pop ebp
- ret
- }
-}
-
-//****************************************************************
-//
-// ******** Textures mirror/clamp/wrap ********
-//
-//****************************************************************
-
-//****************************************************************
-//8b textures mirror/clamp/wrap
-//****************************************************************
-
-extern "C" void __declspec(naked) asmMirror8bS (int tex, int start, int width, int height, int mask, int line, int full, int count)
-{
- _asm{
- ALIGN 4
-
- push ebp
- mov ebp, esp
- push ebx
- push esi
- push edi
-
- mov edi,[start]
- mov ecx,[height]
-loop_y:
-
- xor edx,edx
-loop_x:
- mov esi,[tex]
- mov ebx,[width]
- add ebx,edx
- and ebx,[width]
- jnz is_mirrored
-
- mov eax,edx
- and eax,[mask]
- add esi,eax
- mov al,[esi]
- mov [edi],al
- inc edi
- jmp end_mirror_check
-is_mirrored:
- add esi,[mask]
- mov eax,edx
- and eax,[mask]
- sub esi,eax
- mov al,[esi]
- mov [edi],al
- inc edi
-end_mirror_check:
-
- inc edx
- cmp edx,[count]
- jne loop_x
-
- add edi,[line]
- mov eax,[tex]
- add eax,[full]
- mov [tex],eax
-
- dec ecx
- jnz loop_y
-
- pop edi
- pop esi
- pop ebx
- mov esp, ebp
- pop ebp
- ret
- }
-}
-
-extern "C" void __declspec(naked) asmWrap8bS (int tex, int start, int height, int mask, int line, int full, int count)
-{
- _asm {
- align 4
- push ebp
- mov ebp, esp
- push ebx
- push esi
- push edi
-
- mov edi,[start]
- mov ecx,[height]
-loop_y:
-
- xor edx,edx
-loop_x:
-
- mov esi,[tex]
- mov eax,edx
- and eax,[mask]
- shl eax,2
- add esi,eax
- mov eax,[esi]
- mov [edi],eax
- add edi,4
-
- inc edx
- cmp edx,[count]
- jne loop_x
-
- add edi,[line]
- mov eax,[tex]
- add eax,[full]
- mov [tex],eax
-
- dec ecx
- jnz loop_y
-
- pop edi
- pop esi
- pop ebx
- mov esp, ebp
- pop ebp
- ret
- }
-}
-
-extern "C" void __declspec(naked) asmClamp8bS (int tex, int constant, int height,int line, int full, int count)
-{
- _asm {
- align 4
- push ebp
- mov ebp, esp
- push ebx
- push esi
- push edi
-
- mov esi,[constant]
- mov edi,[tex]
-
- mov ecx,[height]
-y_loop:
-
- mov al,[esi]
-
- mov edx,[count]
-x_loop:
-
- mov [edi],al // don't unroll or make dword, it may go into next line (doesn't have to be multiple of two)
- inc edi
-
- dec edx
- jnz x_loop
-
- add esi,[full]
- add edi,[line]
-
- dec ecx
- jnz y_loop
-
- pop edi
- pop esi
- pop ebx
- mov esp, ebp
- pop ebp
- ret
- }
-}
-
-//****************************************************************
-//16b textures mirror/clamp/wrap
-//****************************************************************
-
-extern "C" void __declspec(naked) asmMirror16bS (int tex, int start, int width, int height, int mask, int line, int full, int count)
-{
- _asm {
- align 4
- push ebp
- mov ebp, esp
- push ebx
- push esi
- push edi
-
- mov edi,[start]
- mov ecx,[height]
-loop_y:
-
- xor edx,edx
-loop_x:
- mov esi,[tex]
- mov ebx,[width]
- add ebx,edx
- and ebx,[width]
- jnz is_mirrored
-
- mov eax,edx
- shl eax,1
- and eax,[mask]
- add esi,eax
- mov ax,[esi]
- mov [edi],ax
- add edi,2
- jmp end_mirror_check
-is_mirrored:
- add esi,[mask]
- mov eax,edx
- shl eax,1
- and eax,[mask]
- sub esi,eax
- mov ax,[esi]
- mov [edi],ax
- add edi,2
-end_mirror_check:
-
- inc edx
- cmp edx,[count]
- jne loop_x
-
- add edi,[line]
- mov eax,[tex]
- add eax,[full]
- mov [tex],eax
-
- dec ecx
- jnz loop_y
-
- pop edi
- pop esi
- pop ebx
- mov esp, ebp
- pop ebp
- ret
- }
-}
-
-extern "C" void __declspec(naked) asmWrap16bS (int tex, int start, int height, int mask, int line, int full, int count)
-{
- _asm {
- align 4
- push ebp
- mov ebp, esp
- push ebx
- push esi
- push edi
-
- mov edi,[start]
- mov ecx,[height]
-loop_y:
-
- xor edx,edx
-loop_x:
-
- mov esi,[tex]
- mov eax,edx
- and eax,[mask]
- shl eax,2
- add esi,eax
- mov eax,[esi]
- mov [edi],eax
- add edi,4
-
- inc edx
- cmp edx,[count]
- jne loop_x
-
- add edi,[line]
- mov eax,[tex]
- add eax,[full]
- mov [tex],eax
-
- dec ecx
- jnz loop_y
-
- pop edi
- pop esi
- pop ebx
- mov esp, ebp
- pop ebp
- ret
- }
-}
-
-extern "C" void __declspec(naked) asmClamp16bS (int tex, int constant, int height,int line, int full, int count)
-{
- _asm {
- align 4
- push ebp
- mov ebp, esp
- push ebx
- push esi
- push edi
-
- mov esi,[constant]
- mov edi,[tex]
-
- mov ecx,[height]
-y_loop:
-
- mov ax,[esi]
-
- mov edx,[count]
-x_loop:
-
- mov [edi],ax // don't unroll or make dword, it may go into next line (doesn't have to be multiple of two)
- add edi,2
-
- dec edx
- jnz x_loop
-
- add esi,[full]
- add edi,[line]
-
- dec ecx
- jnz y_loop
-
- pop edi
- pop esi
- pop ebx
- mov esp, ebp
- pop ebp
- ret
- }
-}
-
-//****************************************************************
-//32b textures mirror/clamp/wrap
-//****************************************************************
-
-extern "C" void __declspec(naked) asmMirror32bS (int tex, int start, int width, int height, int mask, int line, int full, int count)
-{
- _asm {
- align 4
- push ebp
- mov ebp, esp
- push ebx
- push esi
- push edi
-
- mov edi,[start]
- mov ecx,[height]
-loop_y:
-
- xor edx,edx
-loop_x:
- mov esi,[tex]
- mov ebx,[width]
- add ebx,edx
- and ebx,[width]
- jnz is_mirrored
-
- mov eax,edx
- shl eax,2
- and eax,[mask]
- add esi,eax
- mov eax,[esi]
- mov [edi],eax
- add edi,4
- jmp end_mirror_check
-is_mirrored:
- add esi,[mask]
- mov eax,edx
- shl eax,2
- and eax,[mask]
- sub esi,eax
- mov eax,[esi]
- mov [edi],eax
- add edi,4
-end_mirror_check:
-
- inc edx
- cmp edx,[count]
- jne loop_x
-
- add edi,[line]
- mov eax,[tex]
- add eax,[full]
- mov [tex],eax
-
- dec ecx
- jnz loop_y
-
- pop edi
- pop esi
- pop ebx
- mov esp, ebp
- pop ebp
- ret
- }
-}
-
-extern "C" void __declspec(naked) asmWrap32bS (int tex, int start, int height, int mask, int line, int full, int count)
-{
- _asm {
- align 4
- push ebp
- mov ebp, esp
- push ebx
- push esi
- push edi
-
- mov edi,[start]
- mov ecx,[height]
-loop_y:
-
- xor edx,edx
-loop_x:
-
- mov esi,[tex]
- mov eax,edx
- and eax,[mask]
- shl eax,2
- add esi,eax
- mov eax,[esi]
- mov [edi],eax
- add edi,4
-
- inc edx
- cmp edx,[count]
- jne loop_x
-
- add edi,[line]
- mov eax,[tex]
- add eax,[full]
- mov [tex],eax
-
- dec ecx
- jnz loop_y
-
- pop edi
- pop esi
- pop ebx
- mov esp, ebp
- pop ebp
- ret
- }
-}
-
-extern "C" void __declspec(naked) asmClamp32bS (int tex, int constant, int height,int line, int full, int count)
-{
- _asm {
- align 4
- push ebp
- mov ebp, esp
- push ebx
- push esi
- push edi
-
- mov esi,[constant]
- mov edi,[tex]
-
- mov ecx,[height]
-y_loop:
-
- mov eax,[esi]
-
- mov edx,[count]
-x_loop:
-
- mov [edi],eax // don't unroll or make dword, it may go into next line (doesn't have to be multiple of two)
- add edi,4
-
- dec edx
- jnz x_loop
-
- add esi,[full]
- add edi,[line]
-
- dec ecx
- jnz y_loop
-
- pop edi
- pop esi
- pop ebx
- mov esp, ebp
- pop ebp
- ret
- }
-}
-
-//****************************************************************
-//
-// ******** Textures conversion ********
-//
-//****************************************************************
-
-extern "C" void __declspec(naked) asmTexConv_ARGB1555_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int isize)
-{
- _asm {
- align 4
- push ebp
- mov ebp, esp
- push ebx
- push esi
- push edi
-
- mov esi,[src]
- mov edi,[dst]
- mov ecx,[isize]
-
-tc1_loop:
- mov eax,[esi]
- add esi,4
-
- // arrr rrgg gggb bbbb
- // aaaa rrrr gggg bbbb
- mov edx,eax
- and eax,0x80008000
- mov ebx,eax // ebx = 0xa000000000000000
- shr eax,1
- or ebx,eax // ebx = 0xaa00000000000000
- shr eax,1
- or ebx,eax // ebx = 0xaaa0000000000000
- shr eax,1
- or ebx,eax // ebx = 0xaaaa000000000000
-
- mov eax,edx
- and eax,0x78007800 // eax = 0x0rrrr00000000000
- shr eax,3 // eax = 0x0000rrrr00000000
- or ebx,eax // ebx = 0xaaaarrrr00000000
-
- mov eax,edx
- and eax,0x03c003c0 // eax = 0x000000gggg000000
- shr eax,2 // eax = 0x00000000gggg0000
- or ebx,eax // ebx = 0xaaaarrrrgggg0000
-
- and edx,0x001e001e // edx = 0x00000000000bbbb0
- shr edx,1 // edx = 0x000000000000bbbb
- or ebx,edx // ebx = 0xaaaarrrrggggbbbb
-
- mov [edi],ebx
- add edi,4
-
- dec ecx
- jnz tc1_loop
-
- pop edi
- pop esi
- pop ebx
- mov esp, ebp
- pop ebp
- ret
- }
-}
-
-extern "C" void __declspec(naked) asmTexConv_AI88_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int isize)
-{
- _asm {
- align 4
- push ebp
- mov ebp, esp
- push ebx
- push esi
- push edi
-
- mov esi,[src]
- mov edi,[dst]
- mov ecx,[isize]
-
-tc1_loop:
- mov eax,[esi]
- add esi,4
-
- // aaaa aaaa iiii iiii
- // aaaa rrrr gggg bbbb
- mov edx,eax
- and eax,0xF000F000 // eax = 0xaaaa000000000000
- mov ebx,eax // ebx = 0xaaaa000000000000
-
- and edx,0x00F000F0 // edx = 0x00000000iiii0000
- shl edx,4 // edx = 0x0000iiii00000000
- or ebx,edx // ebx = 0xaaaaiiii00000000
- shr edx,4 // edx = 0x00000000iiii0000
- or ebx,edx // ebx = 0xaaaaiiiiiiii0000
- shr edx,4 // edx = 0x000000000000iiii
- or ebx,edx // ebx = 0xaaaaiiiiiiiiiiii
-
- mov [edi],ebx
- add edi,4
-
- dec ecx
- jnz tc1_loop
-
- pop edi
- pop esi
- pop ebx
- mov esp, ebp
- pop ebp
- ret
- }
-}
-
-extern "C" void __declspec(naked) asmTexConv_AI44_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int isize)
-{
- _asm {
- align 4
- push ebp
- mov ebp, esp
- push ebx
- push esi
- push edi
-
- mov esi,[src]
- mov edi,[dst]
- mov ecx,[isize]
-
-tc1_loop:
- mov eax,[esi]
- add esi,4
-
- // aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0
- // aaaa1 rrrr1 gggg1 bbbb1 aaaa0 rrrr0 gggg0 bbbb0
- // aaaa3 rrrr3 gggg3 bbbb3 aaaa2 rrrr2 gggg2 bbbb2
- mov edx,eax // eax = aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0
- shl eax,16 // eax = aaaa1 iiii1 aaaa0 iiii0 0000 0000 0000 0000
- and eax,0xFF000000 // eax = aaaa1 iiii1 0000 0000 0000 0000 0000 0000
- mov ebx,eax // ebx = aaaa1 iiii1 0000 0000 0000 0000 0000 0000
- and eax,0x0F000000 // eax = 0000 iiii1 0000 0000 0000 0000 0000 0000
- shr eax,4 // eax = 0000 0000 iiii1 0000 0000 0000 0000 0000
- or ebx,eax // ebx = aaaa1 iiii1 iiii1 0000 0000 0000 0000 0000
- shr eax,4 // eax = 0000 0000 0000 iiii1 0000 0000 0000 0000
- or ebx,eax // ebx = aaaa1 iiii1 iiii1 iiii1 0000 0000 0000 0000
-
- mov eax,edx // eax = aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0
- shl eax,8 // eax = aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0 0000 0000
- and eax,0x0000FF00 // eax = 0000 0000 0000 0000 aaaa0 iiii0 0000 0000
- or ebx,eax // ebx = aaaa1 iiii1 iiii1 iiii1 aaaa0 iiii0 0000 0000
- and eax,0x00000F00 // eax = 0000 0000 0000 0000 0000 iiii0 0000 0000
- shr eax,4 // eax = 0000 0000 0000 0000 0000 0000 iiii0 0000
- or ebx,eax // ebx = aaaa1 iiii1 iiii1 iiii1 aaaa0 iiii0 iiii0 0000
- shr eax,4 // eax = 0000 0000 0000 0000 0000 0000 0000 iiii0
- or ebx,eax // ebx = aaaa1 iiii1 iiii1 iiii1 aaaa0 iiii0 iiii0 iiii0
-
- mov [edi],ebx
- add edi,4
-
- mov eax,edx // eax = aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0
- and eax,0xFF000000 // eax = aaaa3 iiii3 0000 0000 0000 0000 0000 0000
- mov ebx,eax // ebx = aaaa3 iiii3 0000 0000 0000 0000 0000 0000
- and eax,0x0F000000 // eax = 0000 iiii3 0000 0000 0000 0000 0000 0000
- shr eax,4 // eax = 0000 0000 iiii3 0000 0000 0000 0000 0000
- or ebx,eax // ebx = aaaa3 iiii3 iiii3 0000 0000 0000 0000 0000
- shr eax,4 // eax = 0000 0000 0000 iiii3 0000 0000 0000 0000
- or ebx,eax // ebx = aaaa3 iiii3 iiii3 iiii3 0000 0000 0000 0000
-
- // edx = aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0
- shr edx,8 // edx = 0000 0000 aaaa3 aaaa3 aaaa2 iiii2 aaaa1 iiii1
- and edx,0x0000FF00 // edx = 0000 0000 0000 0000 aaaa2 iiii2 0000 0000
- or ebx,edx // ebx = aaaa3 iiii3 iiii3 iiii3 aaaa2 iiii2 0000 0000
- and edx,0x00000F00 // edx = 0000 0000 0000 0000 0000 iiii2 0000 0000
- shr edx,4 // edx = 0000 0000 0000 0000 0000 0000 iiii2 0000
- or ebx,edx // ebx = aaaa3 iiii3 iiii3 iiii3 aaaa2 iiii2 iiii2 0000
- shr edx,4 // edx = 0000 0000 0000 0000 0000 0000 0000 iiii2
- or ebx,edx // ebx = aaaa3 iiii3 iiii3 iiii3 aaaa2 iiii2 iiii2 iiii2
-
- mov [edi],ebx
- add edi,4
-
- dec ecx
- jnz tc1_loop
-
- pop edi
- pop esi
- pop ebx
- mov esp, ebp
- pop ebp
- ret
- }
-}
-
-extern "C" void __declspec(naked) asmTexConv_A8_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int isize)
-{
- _asm {
- align 4
- push ebp
- mov ebp, esp
- push ebx
- push esi
- push edi
-
- mov esi,[src]
- mov edi,[dst]
- mov ecx,[isize]
-
-tc1_loop:
- mov eax,[esi]
- add esi,4
-
- // aaaa3 aaaa3 aaaa2 aaaa2 aaaa1 aaaa1 aaaa0 aaaa0
- // aaaa1 rrrr1 gggg1 bbbb1 aaaa0 rrrr0 gggg0 bbbb0
- // aaaa3 rrrr3 gggg3 bbbb3 aaaa2 rrrr2 gggg2 bbbb2
- mov edx,eax
- and eax,0x0000F000 // eax = 00 00 00 00 a1 00 00 00
- shl eax,16 // eax = a1 00 00 00 00 00 00 00
- mov ebx,eax // ebx = a1 00 00 00 00 00 00 00
- shr eax,4
- or ebx,eax // ebx = a1 a1 00 00 00 00 00 00
- shr eax,4
- or ebx,eax // ebx = a1 a1 a1 00 00 00 00 00
- shr eax,4
- or ebx,eax // ebx = a1 a1 a1 a1 00 00 00 00
-
- mov eax,edx
- and eax,0x000000F0 // eax = 00 00 00 00 00 00 a0 00
- shl eax,8 // eax = 00 00 00 00 a0 00 00 00
- or ebx,eax
- shr eax,4
- or ebx,eax
- shr eax,4
- or ebx,eax
- shr eax,4
- or ebx,eax // ebx = a1 a1 a1 a1 a0 a0 a0 a0
-
- mov [edi],ebx
- add edi,4
-
- mov eax,edx // eax = a3 a3 a2 a2 a1 a1 a0 a0
- and eax,0xF0000000 // eax = a3 00 00 00 00 00 00 00
- mov ebx,eax // ebx = a3 00 00 00 00 00 00 00
- shr eax,4
- or ebx,eax // ebx = a3 a3 00 00 00 00 00 00
- shr eax,4
- or ebx,eax // ebx = a3 a3 a3 00 00 00 00 00
- shr eax,4
- or ebx,eax // ebx = a3 a3 a3 a3 00 00 00 00
-
- and edx,0x00F00000 // eax = 00 00 a2 00 00 00 00 00
- shr edx,8 // eax = 00 00 00 00 a2 00 00 00
- or ebx,edx
- shr edx,4
- or ebx,edx
- shr edx,4
- or ebx,edx
- shr edx,4
- or ebx,edx // ebx = a3 a3 a3 a3 a2 a2 a2 a2
-
- mov [edi],ebx
- add edi,4
-
- dec ecx
- jnz tc1_loop
-
- pop edi
- pop esi
- pop ebx
- mov esp, ebp
- pop ebp
- ret
- }
-}
-
-//****************************************************************
-//
-// ******** Tmem functions ********
-//
-//****************************************************************
-
-//****************************************************************
-// CopyBlock - copies a block from base_addr+offset to dest_addr, while unswapping the
-// data within.
-//
-// edi = dest_addr -> end of dest
-// ecx = num_words
-// esi = base_addr (preserved)
-// edx = offset (preserved)
-//****************************************************************
-void __declspec(naked) CopyBlock ( void )
-{
- _asm {
- align 4
- push ebp
- mov ebp, esp
- push eax
- push ebx
- push esi
- push edx
-
- or ecx,ecx
- jz near copyblock_end
-
- push ecx
-
- // first, set the source address and check if not on a dword boundary
- push esi
- push edx
- mov ebx,edx
- and edx,0FFFFFFFCh
- add esi,edx
-
- and ebx,3 // ebx = # we DON'T need to copy
- jz copyblock_copy
-
- mov edx,4 // ecx = # we DO need to copy
- sub edx,ebx
-
- // load the first word, accounting for swapping
-
- mov eax,[esi]
- add esi,4
-copyblock_precopy_skip:
- rol eax,8
- dec ebx
- jnz copyblock_precopy_skip
-
-copyblock_precopy_copy:
- rol eax,8
- mov [edi],al
- inc edi
- dec edx
- jnz copyblock_precopy_copy
-
- mov eax,[esi]
- add esi,4
- bswap eax
- mov [edi],eax
- add edi,4
-
- dec ecx // 1 less word to copy
- jz copyblock_postcopy
-
-copyblock_copy:
- mov eax,[esi]
- bswap eax
- mov [edi],eax
-
- mov eax,[esi+4]
- bswap eax
- mov [edi+4],eax
-
- add esi,8
- add edi,8
-
- dec ecx
- jnz copyblock_copy
-
-copyblock_postcopy:
- pop edx
- pop esi
- pop ecx
-
- // check again if on dword boundary
- mov ebx,edx // ebx = # we DO need to copy
-
- and ebx,3
- jz copyblock_end
-
- shl ecx,3 // ecx = num_words * 8
- add edx,ecx
- and edx,0FFFFFFFCh
- add esi,edx
-
- mov eax,[esi]
-
-copyblock_postcopy_copy:
- rol eax,8
- mov [edi],al
- inc edi
- dec ebx
- jnz copyblock_postcopy_copy
-
-copyblock_end:
- pop edx
- pop esi
- pop ebx
- pop eax
- mov esp, ebp
- pop ebp
- ret
- }
-}
-
-extern "C" __declspec(naked) void SwapBlock32 ( void )
-{
-//****************************************************************
-// SwapBlock - swaps every other 32-bit word at addr
-//
-// ecx = num_words -> 0
-// edi = addr -> end of dest
-//****************************************************************
- _asm {
- align 4
- push ebp
- mov ebp, esp
- push eax
- push ebx
- or ecx,ecx
- jz swapblock32_end
-swapblock32_loop:
- mov eax,[edi]
- mov ebx,[edi+4]
- mov [edi],ebx
- mov [edi+4],eax
- add edi,8
- dec ecx
- jnz swapblock32_loop
-swapblock32_end:
- pop ebx
- pop eax
- mov esp, ebp
- pop ebp
- ret
- }
-}
-
-//****************************************************************
-//
-// ******** Load block/tile ********
-//
-//****************************************************************
-
-extern "C" __declspec(naked) void asmLoadBlock(int src, int dst, int off, int dxt, int cnt, wxUIntPtr swp)
-{
- _asm {
- align 4
- push ebp
- mov ebp, esp
-
- push ebx
- push esi
- push edi
-
- // copy the data
- mov esi,[src]
- mov edi,[dst]
- mov ecx,[cnt]
- mov edx,[off]
- call CopyBlock
-
- // now swap it
- mov eax,[cnt] // eax = count remaining
- xor edx,edx // edx = dxt counter
- mov edi,[dst]
- mov ebx,[dxt]
-
- xor ecx,ecx // ecx = how much to copy
-dxt_test:
- add edi,8
- dec eax
- jz end_dxt_test
- add edx,ebx
- jns dxt_test
-
-dxt_s_test:
- inc ecx
- dec eax
- jz end_dxt_test
- add edx,ebx
- js dxt_s_test
-
- // swap this data (ecx set, dst set)
- call [swp] // (ecx reset to 0 after)
-
- jmp dxt_test // and repeat
-
-end_dxt_test:
- // swap any remaining data
- call [swp]
-
- pop edi
- pop esi
- pop ebx
- mov esp, ebp
- pop ebp
- ret
- }
-}
-
-extern "C" __declspec(naked) void asmLoadTile(int src, int dst, int width, int height, int line, int off, int end)
-{
- _asm {
- align 4
- push ebp
- mov ebp, esp
-
- push ebx
- push esi
- push edi
-
- // set initial values
- mov edi,[dst]
- mov ecx,[width]
- mov esi,[src]
- mov edx,[off]
- xor ebx,ebx // swap this line?
- mov eax,[height]
-
-loadtile_loop:
- cmp [end],edi // end of tmem: error
- jc loadtile_end
-
- // copy this line
- push edi
- push ecx
- call CopyBlock
- pop ecx
-
- // swap it?
- xor ebx,1
- jnz loadtile_no_swap
-
- // (ecx set, restore edi)
- pop edi
- push ecx
- call SwapBlock32
- pop ecx
- jmp loadtile_swap_end
-loadtile_no_swap:
- add sp,4 // forget edi, we are already at the next position
-loadtile_swap_end:
-
- add edx,[line]
-
- dec eax
- jnz loadtile_loop
-
-loadtile_end:
-
- pop edi
- pop esi
- pop ebx
- mov esp, ebp
- pop ebp
- ret
- }
-}
-
-
-//****************************************************************
-//
-// ******** Texture CRC ********
-//
-//****************************************************************
-extern "C" __declspec(naked) int asmTextureCRC(int addr, int width, int height, int line)
-{
- _asm {
- align 4
- push ebp
- mov ebp, esp
-
- push ebx
- push edi
-
- xor eax,eax // eax is final result
- mov ebx,[line]
- mov ecx,[height] // ecx is height counter
- mov edi,[addr] // edi is ptr to texture memory
-crc_loop_y:
- push ecx
-
- mov ecx,[width]
-crc_loop_x:
-
- add eax,[edi] // MUST be 64-bit aligned, so manually unroll
- add eax,[edi+4]
- mov edx,ecx
- mul edx
- add eax,edx
- add edi,8
-
- dec ecx
- jnz crc_loop_x
-
- pop ecx
-
- mov edx,ecx
- mul edx
- add eax,edx
-
- add edi,ebx
-
- dec ecx
- jnz crc_loop_y
-
- pop edi
- pop ebx
- mov esp, ebp
- pop ebp
- ret
- }
-}
diff --git a/Source/Glide64/rdp.cpp b/Source/Glide64/rdp.cpp
index e7397e08b..52213cf07 100644
--- a/Source/Glide64/rdp.cpp
+++ b/Source/Glide64/rdp.cpp
@@ -48,8 +48,38 @@
#include "FBtoScreen.h"
#include "CRC.h"
-extern "C" void SwapBlock32 ();
-extern "C" void SwapBlock64 ();
+extern "C" __declspec(naked) void SwapBlock32 ( void )
+{
+//****************************************************************
+// SwapBlock - swaps every other 32-bit word at addr
+//
+// ecx = num_words -> 0
+// edi = addr -> end of dest
+//****************************************************************
+ _asm {
+ align 4
+ push ebp
+ mov ebp, esp
+ push eax
+ push ebx
+ or ecx,ecx
+ jz swapblock32_end
+swapblock32_loop:
+ mov eax,[edi]
+ mov ebx,[edi+4]
+ mov [edi],ebx
+ mov [edi+4],eax
+ add edi,8
+ dec ecx
+ jnz swapblock32_loop
+swapblock32_end:
+ pop ebx
+ pop eax
+ mov esp, ebp
+ pop ebp
+ ret
+ }
+}
const int NumOfFormats = 3;
SCREEN_SHOT_FORMAT ScreenShotFormats[NumOfFormats] = { {wxT("BMP"), wxT("bmp"), wxBITMAP_TYPE_BMP}, {wxT("PNG"), wxT("png"), wxBITMAP_TYPE_PNG}, {wxT("JPEG"), wxT("jpeg"), wxBITMAP_TYPE_JPEG} };
@@ -1824,7 +1854,168 @@ void setTBufTex(wxUint16 t_mem, wxUint32 cnt)
}
}
-extern "C" void asmLoadBlock(int src, int dst, int off, int dxt, int cnt, int swp);
+void __declspec(naked) CopyBlock ( void )
+{
+ _asm {
+ align 4
+ push ebp
+ mov ebp, esp
+ push eax
+ push ebx
+ push esi
+ push edx
+
+ or ecx,ecx
+ jz near copyblock_end
+
+ push ecx
+
+ // first, set the source address and check if not on a dword boundary
+ push esi
+ push edx
+ mov ebx,edx
+ and edx,0FFFFFFFCh
+ add esi,edx
+
+ and ebx,3 // ebx = # we DON'T need to copy
+ jz copyblock_copy
+
+ mov edx,4 // ecx = # we DO need to copy
+ sub edx,ebx
+
+ // load the first word, accounting for swapping
+
+ mov eax,[esi]
+ add esi,4
+copyblock_precopy_skip:
+ rol eax,8
+ dec ebx
+ jnz copyblock_precopy_skip
+
+copyblock_precopy_copy:
+ rol eax,8
+ mov [edi],al
+ inc edi
+ dec edx
+ jnz copyblock_precopy_copy
+
+ mov eax,[esi]
+ add esi,4
+ bswap eax
+ mov [edi],eax
+ add edi,4
+
+ dec ecx // 1 less word to copy
+ jz copyblock_postcopy
+
+copyblock_copy:
+ mov eax,[esi]
+ bswap eax
+ mov [edi],eax
+
+ mov eax,[esi+4]
+ bswap eax
+ mov [edi+4],eax
+
+ add esi,8
+ add edi,8
+
+ dec ecx
+ jnz copyblock_copy
+
+copyblock_postcopy:
+ pop edx
+ pop esi
+ pop ecx
+
+ // check again if on dword boundary
+ mov ebx,edx // ebx = # we DO need to copy
+
+ and ebx,3
+ jz copyblock_end
+
+ shl ecx,3 // ecx = num_words * 8
+ add edx,ecx
+ and edx,0FFFFFFFCh
+ add esi,edx
+
+ mov eax,[esi]
+
+copyblock_postcopy_copy:
+ rol eax,8
+ mov [edi],al
+ inc edi
+ dec ebx
+ jnz copyblock_postcopy_copy
+
+copyblock_end:
+ pop edx
+ pop esi
+ pop ebx
+ pop eax
+ mov esp, ebp
+ pop ebp
+ ret
+ }
+}
+
+extern "C" __declspec(naked) void asmLoadBlock(int src, int dst, int off, int dxt, int cnt, wxUIntPtr swp)
+{
+ _asm {
+ align 4
+ push ebp
+ mov ebp, esp
+
+ push ebx
+ push esi
+ push edi
+
+ // copy the data
+ mov esi,[src]
+ mov edi,[dst]
+ mov ecx,[cnt]
+ mov edx,[off]
+ call CopyBlock
+
+ // now swap it
+ mov eax,[cnt] // eax = count remaining
+ xor edx,edx // edx = dxt counter
+ mov edi,[dst]
+ mov ebx,[dxt]
+
+ xor ecx,ecx // ecx = how much to copy
+dxt_test:
+ add edi,8
+ dec eax
+ jz end_dxt_test
+ add edx,ebx
+ jns dxt_test
+
+dxt_s_test:
+ inc ecx
+ dec eax
+ jz end_dxt_test
+ add edx,ebx
+ js dxt_s_test
+
+ // swap this data (ecx set, dst set)
+ call [swp] // (ecx reset to 0 after)
+
+ jmp dxt_test // and repeat
+
+end_dxt_test:
+ // swap any remaining data
+ call [swp]
+
+ pop edi
+ pop esi
+ pop ebx
+ mov esp, ebp
+ pop ebp
+ ret
+ }
+}
+
void LoadBlock32b(wxUint32 tile, wxUint32 ul_s, wxUint32 ul_t, wxUint32 lr_s, wxUint32 dxt);
static void rdp_loadblock()
{
@@ -1916,7 +2107,65 @@ static void rdp_loadblock()
setTBufTex(rdp.tiles[tile].t_mem, cnt);
}
-extern "C" void asmLoadTile(int src, int dst, int width, int height, int line, int off, int end);
+extern "C" __declspec(naked) void asmLoadTile(int src, int dst, int width, int height, int line, int off, int end)
+{
+ _asm {
+ align 4
+ push ebp
+ mov ebp, esp
+
+ push ebx
+ push esi
+ push edi
+
+ // set initial values
+ mov edi,[dst]
+ mov ecx,[width]
+ mov esi,[src]
+ mov edx,[off]
+ xor ebx,ebx // swap this line?
+ mov eax,[height]
+
+loadtile_loop:
+ cmp [end],edi // end of tmem: error
+ jc loadtile_end
+
+ // copy this line
+ push edi
+ push ecx
+ call CopyBlock
+ pop ecx
+
+ // swap it?
+ xor ebx,1
+ jnz loadtile_no_swap
+
+ // (ecx set, restore edi)
+ pop edi
+ push ecx
+ call SwapBlock32
+ pop ecx
+ jmp loadtile_swap_end
+loadtile_no_swap:
+ add sp,4 // forget edi, we are already at the next position
+loadtile_swap_end:
+
+ add edx,[line]
+
+ dec eax
+ jnz loadtile_loop
+
+loadtile_end:
+
+ pop edi
+ pop esi
+ pop ebx
+ mov esp, ebp
+ pop ebp
+ ret
+ }
+}
+
void LoadTile32b (wxUint32 tile, wxUint32 ul_s, wxUint32 ul_t, wxUint32 width, wxUint32 height);
static void rdp_loadtile()
{