/* * Glide64 - Glide video plugin for Nintendo 64 emulators. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ /**************************************************************** Glide64 - Glide Plugin for Nintendo 64 emulators Project started on December 29th, 2001 Authors: Dave2001, original author, founded the project in 2001, left it in 2002 Gugaman, joined the project in 2002, left it in 2002 Sergey 'Gonetz' Lipski, joined the project in 2002, main author since fall of 2002 Hiroshi 'KoolSmoky' Morii, joined the project in 2007 **************************************************************** To modify Glide64: * Write your name and (optional)email, commented by your work, so I know who did it, and so that you can find which parts you modified when it comes time to send it to me. * Do NOT send me the whole project or file that you modified. Take out your modified code sections, and tell me where to put them. If people sent the whole thing, I would have many different versions, but no idea how to combine them all. **************************************************************** */ #include "Gfx #1.3.h" /**************************************************************** ******** Textures load ******** ****************************************************************/ /***************************************************************** 4b textures load *****************************************************************/ /**************************************************************** Size: 0, Format: 2 2009 ported to NASM - Sergey (Gonetz) Lipski *****************************************************************/ extern "C" void __declspec(naked) asmLoad4bCI (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext, wxUIntPtr pal) { _asm { push ebp mov ebp, esp push ebx push esi push edi mov ebx,[pal] mov esi,[src] mov edi,[dst] mov ecx,[height] y_loop: push ecx mov ecx,[wid_64] x_loop: push ecx mov eax,[esi] // read all 8 pixels bswap eax add esi,4 mov edx,eax // 1st dword output { shr eax,23 and eax,0x1E mov cx,[ebx+eax] ror cx,1 shl ecx,16 mov eax,edx shr eax,27 and eax,0x1E mov cx,[ebx+eax] ror cx,1 mov [edi],ecx add edi,4 // } // 2nd dword output { mov eax,edx shr eax,15 and eax,0x1E mov cx,[ebx+eax] ror cx,1 shl ecx,16 mov eax,edx shr eax,19 and eax,0x1E mov cx,[ebx+eax] ror cx,1 mov [edi],ecx add edi,4 // } // 3rd dword output { mov eax,edx shr eax,7 and eax,0x1E mov cx,[ebx+eax] ror cx,1 shl ecx,16 mov eax,edx shr eax,11 and eax,0x1E mov cx,[ebx+eax] ror cx,1 mov [edi],ecx add edi,4 // } // 4th dword output { mov eax,edx shl eax,1 and eax,0x1E mov cx,[ebx+eax] ror cx,1 shl ecx,16 shr edx,3 and edx,0x1E mov cx,[ebx+edx] ror cx,1 mov [edi],ecx add edi,4 // } // * copy mov eax,[esi] // read all 8 pixels bswap eax add esi,4 mov edx,eax // 1st dword output { shr eax,23 and eax,0x1E mov cx,[ebx+eax] ror cx,1 shl ecx,16 mov eax,edx shr eax,27 and eax,0x1E mov cx,[ebx+eax] ror cx,1 mov [edi],ecx add edi,4 // } // 2nd dword output { mov eax,edx shr eax,15 and eax,0x1E mov cx,[ebx+eax] ror cx,1 shl ecx,16 mov eax,edx shr eax,19 and eax,0x1E mov cx,[ebx+eax] ror cx,1 mov [edi],ecx add edi,4 // } // 3rd dword output { mov eax,edx shr eax,7 and eax,0x1E mov cx,[ebx+eax] ror cx,1 shl ecx,16 mov eax,edx shr eax,11 and eax,0x1E mov cx,[ebx+eax] ror cx,1 mov [edi],ecx add edi,4 // } // 4th dword output { mov eax,edx shl eax,1 and eax,0x1E mov cx,[ebx+eax] ror cx,1 shl ecx,16 shr edx,3 and edx,0x1E mov cx,[ebx+edx] ror cx,1 mov [edi],ecx add edi,4 // } // * pop ecx dec ecx jnz x_loop pop ecx dec ecx jz near end_y_loop push ecx mov eax,esi add eax,[line] mov esi,[src] sub eax,esi and eax,0x7FF add esi,eax add edi,[ext] mov ecx,[wid_64] x_loop_2: push ecx mov eax,[esi+4] // read all 8 pixels bswap eax mov edx,eax // 1st dword output { shr eax,23 and eax,0x1E mov cx,[ebx+eax] ror cx,1 shl ecx,16 mov eax,edx shr eax,27 and eax,0x1E mov cx,[ebx+eax] ror cx,1 mov [edi],ecx add edi,4 // } // 2nd dword output { mov eax,edx shr eax,15 and eax,0x1E mov cx,[ebx+eax] ror cx,1 shl ecx,16 mov eax,edx shr eax,19 and eax,0x1E mov cx,[ebx+eax] ror cx,1 mov [edi],ecx add edi,4 // } // 3rd dword output { mov eax,edx shr eax,7 and eax,0x1E mov cx,[ebx+eax] ror cx,1 shl ecx,16 mov eax,edx shr eax,11 and eax,0x1E mov cx,[ebx+eax] ror cx,1 mov [edi],ecx add edi,4 // } // 4th dword output { mov eax,edx shl eax,1 and eax,0x1E mov cx,[ebx+eax] ror cx,1 shl ecx,16 shr edx,3 and edx,0x1E mov cx,[ebx+edx] ror cx,1 mov [edi],ecx add edi,4 // } // * copy mov eax,[esi] // read all 8 pixels bswap eax mov edx,esi add edx,8 mov esi,[src] sub edx,esi and edx,0x7FF add esi,edx mov edx,eax // 1st dword output { shr eax,23 and eax,0x1E mov cx,[ebx+eax] ror cx,1 shl ecx,16 mov eax,edx shr eax,27 and eax,0x1E mov cx,[ebx+eax] ror cx,1 mov [edi],ecx add edi,4 // } // 2nd dword output { mov eax,edx shr eax,15 and eax,0x1E mov cx,[ebx+eax] ror cx,1 shl ecx,16 mov eax,edx shr eax,19 and eax,0x1E mov cx,[ebx+eax] ror cx,1 mov [edi],ecx add edi,4 // } // 3rd dword output { mov eax,edx shr eax,7 and eax,0x1E mov cx,[ebx+eax] ror cx,1 shl ecx,16 mov eax,edx shr eax,11 and eax,0x1E mov cx,[ebx+eax] ror cx,1 mov [edi],ecx add edi,4 // } // 4th dword output { mov eax,edx shl eax,1 and eax,0x1E mov cx,[ebx+eax] ror cx,1 shl ecx,16 shr edx,3 and edx,0x1E mov cx,[ebx+edx] ror cx,1 mov [edi],ecx add edi,4 // } // * pop ecx dec ecx jnz x_loop_2 mov eax,esi add eax,[line] mov esi,[src] sub eax,esi and eax,0x7FF add esi,eax add edi,[ext] pop ecx dec ecx jnz y_loop end_y_loop: pop edi pop esi pop ebx mov esp, ebp pop ebp ret } } extern "C" void __declspec(naked) asmLoad4bIAPal (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext, wxUIntPtr pal) { _asm { push ebp mov ebp, esp push ebx push esi push edi mov ebx,[pal] mov esi,[src] mov edi,[dst] mov ecx,[height] y_loop: push ecx mov ecx,[wid_64] x_loop: push ecx mov eax,[esi] // read all 8 pixels bswap eax add esi,4 mov edx,eax // 1st dword output { shr eax,23 and eax,0x1E mov cx,[ebx+eax] ror cx,8 shl ecx,16 mov eax,edx shr eax,27 and eax,0x1E mov cx,[ebx+eax] ror cx,8 mov [edi],ecx add edi,4 // } // 2nd dword output { mov eax,edx shr eax,15 and eax,0x1E mov cx,[ebx+eax] ror cx,8 shl ecx,16 mov eax,edx shr eax,19 and eax,0x1E mov cx,[ebx+eax] ror cx,8 mov [edi],ecx add edi,4 // } // 3rd dword output { mov eax,edx shr eax,7 and eax,0x1E mov cx,[ebx+eax] ror cx,8 shl ecx,16 mov eax,edx shr eax,11 and eax,0x1E mov cx,[ebx+eax] ror cx,8 mov [edi],ecx add edi,4 // } // 4th dword output { mov eax,edx shl eax,1 and eax,0x1E mov cx,[ebx+eax] ror cx,8 shl ecx,16 shr edx,3 and edx,0x1E mov cx,[ebx+edx] ror cx,8 mov [edi],ecx add edi,4 // } // * copy mov eax,[esi] // read all 8 pixels bswap eax add esi,4 mov edx,eax // 1st dword output { shr eax,23 and eax,0x1E mov cx,[ebx+eax] ror cx,8 shl ecx,16 mov eax,edx shr eax,27 and eax,0x1E mov cx,[ebx+eax] ror cx,8 mov [edi],ecx add edi,4 // } // 2nd dword output { mov eax,edx shr eax,15 and eax,0x1E mov cx,[ebx+eax] ror cx,8 shl ecx,16 mov eax,edx shr eax,19 and eax,0x1E mov cx,[ebx+eax] ror cx,8 mov [edi],ecx add edi,4 // } // 3rd dword output { mov eax,edx shr eax,7 and eax,0x1E mov cx,[ebx+eax] ror cx,8 shl ecx,16 mov eax,edx shr eax,11 and eax,0x1E mov cx,[ebx+eax] ror cx,8 mov [edi],ecx add edi,4 // } // 4th dword output { mov eax,edx shl eax,1 and eax,0x1E mov cx,[ebx+eax] ror cx,8 shl ecx,16 shr edx,3 and edx,0x1E mov cx,[ebx+edx] ror cx,8 mov [edi],ecx add edi,4 // } // * pop ecx dec ecx jnz x_loop pop ecx dec ecx jz near end_y_loop push ecx mov eax,esi add eax,[line] mov esi,[src] sub eax,esi and eax,0x7FF add esi,eax add edi,[ext] mov ecx,[wid_64] x_loop_2: push ecx mov eax,[esi+4] // read all 8 pixels bswap eax mov edx,eax // 1st dword output { shr eax,23 and eax,0x1E mov cx,[ebx+eax] ror cx,8 shl ecx,16 mov eax,edx shr eax,27 and eax,0x1E mov cx,[ebx+eax] ror cx,8 mov [edi],ecx add edi,4 // } // 2nd dword output { mov eax,edx shr eax,15 and eax,0x1E mov cx,[ebx+eax] ror cx,8 shl ecx,16 mov eax,edx shr eax,19 and eax,0x1E mov cx,[ebx+eax] ror cx,8 mov [edi],ecx add edi,4 // } // 3rd dword output { mov eax,edx shr eax,7 and eax,0x1E mov cx,[ebx+eax] ror cx,8 shl ecx,16 mov eax,edx shr eax,11 and eax,0x1E mov cx,[ebx+eax] ror cx,8 mov [edi],ecx add edi,4 // } // 4th dword output { mov eax,edx shl eax,1 and eax,0x1E mov cx,[ebx+eax] ror cx,8 shl ecx,16 shr edx,3 and edx,0x1E mov cx,[ebx+edx] ror cx,8 mov [edi],ecx add edi,4 // } // * copy mov eax,[esi] // read all 8 pixels bswap eax mov edx,esi add edx,8 mov esi,[src] sub edx,esi and edx,0x7FF add esi,edx mov edx,eax // 1st dword output { shr eax,23 and eax,0x1E mov cx,[ebx+eax] ror cx,8 shl ecx,16 mov eax,edx shr eax,27 and eax,0x1E mov cx,[ebx+eax] ror cx,8 mov [edi],ecx add edi,4 // } // 2nd dword output { mov eax,edx shr eax,15 and eax,0x1E mov cx,[ebx+eax] ror cx,8 shl ecx,16 mov eax,edx shr eax,19 and eax,0x1E mov cx,[ebx+eax] ror cx,8 mov [edi],ecx add edi,4 // } // 3rd dword output { mov eax,edx shr eax,7 and eax,0x1E mov cx,[ebx+eax] ror cx,8 shl ecx,16 mov eax,edx shr eax,11 and eax,0x1E mov cx,[ebx+eax] ror cx,8 mov [edi],ecx add edi,4 // } // 4th dword output { mov eax,edx shl eax,1 and eax,0x1E mov cx,[ebx+eax] ror cx,8 shl ecx,16 shr edx,3 and edx,0x1E mov cx,[ebx+edx] ror cx,8 mov [edi],ecx add edi,4 // } // * pop ecx dec ecx jnz x_loop_2 mov eax,esi add eax,[line] mov esi,[src] sub eax,esi and eax,0x7FF add esi,eax add edi,[ext] pop ecx dec ecx jnz y_loop end_y_loop: pop edi pop esi pop ebx mov esp, ebp pop ebp ret } } /***************************************************************** Size: 0, Format: 3 ** BY GUGAMAN ** 2009 ported to NASM - Sergey (Gonetz) Lipski *****************************************************************/ extern "C" void __declspec(naked) asmLoad4bIA (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext) { _asm { push ebp mov ebp, esp push ebx push esi push edi mov esi,[src] mov edi,[dst] mov ecx,[height] y_loop: push ecx mov ecx,[wid_64] x_loop: push ecx mov eax,[esi] // read all 8 pixels bswap eax add esi,4 mov edx,eax // 1st dword { xor ecx,ecx // pixel #1 // IIIAxxxxxxxxxxxxxxxxxxxxxxxxxxxx // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII mov eax,edx shr eax,24 //Alpha and eax,0x00000010 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shr eax,28 // Intensity and eax,0x0000000E or ecx,eax shr eax,3 or ecx,eax // pixel #2 // xxxxIIIAxxxxxxxxxxxxxxxxxxxxxxxx // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx mov eax,edx shr eax,12 //Alpha and eax,0x00001000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shr eax,16 // Intensity and eax,0x00000E00 or ecx,eax shr eax,3 and eax,0x00000100 or ecx,eax // pixel #3 // xxxxxxxxIIIAxxxxxxxxxxxxxxxxxxxx // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx //Alpha mov eax,edx and eax,0x00100000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shr eax,4 // Intensity and eax,0x000E0000 or ecx,eax shr eax,3 and eax,0x00010000 or ecx,eax // pixel #4 // xxxxxxxxxxxxIIIAxxxxxxxxxxxxxxxx // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx mov eax,edx shl eax,12 //Alpha and eax,0x10000000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shl eax,8 // Intensity and eax,0x0E000000 or ecx,eax shr eax,3 and eax,0x01000000 or ecx,eax mov [edi],ecx add edi,4 // } // 2nd dword { xor ecx,ecx // pixel #5 // xxxxxxxxxxxxxxxxIIIAxxxxxxxxxxxx // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII mov eax,edx shr eax,8 //Alpha and eax,0x00000010 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shr eax,12 // Intensity and eax,0x0000000E or ecx,eax shr eax,3 or ecx,eax // pixel #6 // xxxxxxxxxxxxxxxxxxxxIIIAxxxxxxxx // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx //Alpha mov eax,edx shl eax,4 and eax,0x00001000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx // Intensity and eax,0x00000E00 or ecx,eax shr eax,3 and eax,0x00000100 or ecx,eax // pixel #7 // xxxxxxxxxxxxxxxxxxxxxxxxIIIAxxxx // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx //Alpha mov eax,edx shl eax,16 and eax,0x00100000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shl eax,12 // Intensity and eax,0x000E0000 or ecx,eax shr eax,3 and eax,0x00010000 or ecx,eax // pixel #8 // xxxxxxxxxxxxxxxxxxxxxxxxxxxxIIIA // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx mov eax,edx shl eax,28 //Alpha and eax,0x10000000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shl eax,24 // Intensity and eax,0x0E000000 or ecx,eax shr eax,3 and eax,0x01000000 or ecx,eax mov [edi],ecx add edi,4 // } // * copy mov eax,[esi] // read all 8 pixels bswap eax add esi,4 mov edx,eax // 1st dword { xor ecx,ecx // pixel #1 // IIIAxxxxxxxxxxxxxxxxxxxxxxxxxxxx // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII mov eax,edx shr eax,24 //Alpha and eax,0x00000010 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shr eax,28 // Intensity and eax,0x0000000E or ecx,eax shr eax,3 or ecx,eax // pixel #2 // xxxxIIIAxxxxxxxxxxxxxxxxxxxxxxxx // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx mov eax,edx shr eax,12 //Alpha and eax,0x00001000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shr eax,16 // Intensity and eax,0x00000E00 or ecx,eax shr eax,3 and eax,0x00000100 or ecx,eax // pixel #3 // xxxxxxxxIIIAxxxxxxxxxxxxxxxxxxxx // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx //Alpha mov eax,edx and eax,0x00100000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shr eax,4 // Intensity and eax,0x000E0000 or ecx,eax shr eax,3 and eax,0x00010000 or ecx,eax // pixel #4 // xxxxxxxxxxxxIIIAxxxxxxxxxxxxxxxx // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx mov eax,edx shl eax,12 //Alpha and eax,0x10000000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shl eax,8 // Intensity and eax,0x0E000000 or ecx,eax shr eax,3 and eax,0x01000000 or ecx,eax mov [edi],ecx add edi,4 // } // 2nd dword { xor ecx,ecx // pixel #5 // xxxxxxxxxxxxxxxxIIIAxxxxxxxxxxxx // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII mov eax,edx shr eax,8 //Alpha and eax,0x00000010 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shr eax,12 // Intensity and eax,0x0000000E or ecx,eax shr eax,3 or ecx,eax // pixel #6 // xxxxxxxxxxxxxxxxxxxxIIIAxxxxxxxx // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx //Alpha mov eax,edx shl eax,4 and eax,0x00001000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx // Intensity and eax,0x00000E00 or ecx,eax shr eax,3 and eax,0x00000100 or ecx,eax // pixel #7 // xxxxxxxxxxxxxxxxxxxxxxxxIIIAxxxx // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx //Alpha mov eax,edx shl eax,16 and eax,0x00100000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shl eax,12 // Intensity and eax,0x000E0000 or ecx,eax shr eax,3 and eax,0x00010000 or ecx,eax // pixel #8 // xxxxxxxxxxxxxxxxxxxxxxxxxxxxIIIA // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx mov eax,edx shl eax,28 //Alpha and eax,0x10000000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shl eax,24 // Intensity and eax,0x0E000000 or ecx,eax shr eax,3 and eax,0x01000000 or ecx,eax mov [edi],ecx add edi,4 // } // * pop ecx dec ecx jnz x_loop pop ecx dec ecx jz near end_y_loop push ecx add esi,[line] add edi,[ext] mov ecx,[wid_64] x_loop_2: push ecx mov eax,[esi+4] // read all 8 pixels bswap eax mov edx,eax // 1st dword { xor ecx,ecx // pixel #1 // IIIAxxxxxxxxxxxxxxxxxxxxxxxxxxxx // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII mov eax,edx shr eax,24 //Alpha and eax,0x00000010 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shr eax,28 // Intensity and eax,0x0000000E or ecx,eax shr eax,3 or ecx,eax // pixel #2 // xxxxIIIAxxxxxxxxxxxxxxxxxxxxxxxx // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx mov eax,edx shr eax,12 //Alpha and eax,0x00001000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shr eax,16 // Intensity and eax,0x00000E00 or ecx,eax shr eax,3 and eax,0x00000100 or ecx,eax // pixel #3 // xxxxxxxxIIIAxxxxxxxxxxxxxxxxxxxx // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx //Alpha mov eax,edx and eax,0x00100000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shr eax,4 // Intensity and eax,0x000E0000 or ecx,eax shr eax,3 and eax,0x00010000 or ecx,eax // pixel #4 // xxxxxxxxxxxxIIIAxxxxxxxxxxxxxxxx // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx mov eax,edx shl eax,12 //Alpha and eax,0x10000000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shl eax,8 // Intensity and eax,0x0E000000 or ecx,eax shr eax,3 and eax,0x01000000 or ecx,eax mov [edi],ecx add edi,4 // } // 2nd dword { xor ecx,ecx // pixel #5 // xxxxxxxxxxxxxxxxIIIAxxxxxxxxxxxx // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII mov eax,edx shr eax,8 //Alpha and eax,0x00000010 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shr eax,12 // Intensity and eax,0x0000000E or ecx,eax shr eax,3 or ecx,eax // pixel #6 // xxxxxxxxxxxxxxxxxxxxIIIAxxxxxxxx // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx //Alpha mov eax,edx shl eax,4 and eax,0x00001000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx // Intensity and eax,0x00000E00 or ecx,eax shr eax,3 and eax,0x00000100 or ecx,eax // pixel #7 // xxxxxxxxxxxxxxxxxxxxxxxxIIIAxxxx // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx //Alpha mov eax,edx shl eax,16 and eax,0x00100000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shl eax,12 // Intensity and eax,0x000E0000 or ecx,eax shr eax,3 and eax,0x00010000 or ecx,eax // pixel #8 // xxxxxxxxxxxxxxxxxxxxxxxxxxxxIIIA // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx mov eax,edx shl eax,28 //Alpha and eax,0x10000000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shl eax,24 // Intensity and eax,0x0E000000 or ecx,eax shr eax,3 and eax,0x01000000 or ecx,eax mov [edi],ecx add edi,4 // } // * copy mov eax,[esi] // read all 8 pixels bswap eax add esi,8 mov edx,eax // 1st dword { xor ecx,ecx // pixel #1 // IIIAxxxxxxxxxxxxxxxxxxxxxxxxxxxx // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII mov eax,edx shr eax,24 //Alpha and eax,0x00000010 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shr eax,28 // Intensity and eax,0x0000000E or ecx,eax shr eax,3 or ecx,eax // pixel #2 // xxxxIIIAxxxxxxxxxxxxxxxxxxxxxxxx // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx mov eax,edx shr eax,12 //Alpha and eax,0x00001000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shr eax,16 // Intensity and eax,0x00000E00 or ecx,eax shr eax,3 and eax,0x00000100 or ecx,eax // pixel #3 // xxxxxxxxIIIAxxxxxxxxxxxxxxxxxxxx // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx //Alpha mov eax,edx and eax,0x00100000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shr eax,4 // Intensity and eax,0x000E0000 or ecx,eax shr eax,3 and eax,0x00010000 or ecx,eax // pixel #4 // xxxxxxxxxxxxIIIAxxxxxxxxxxxxxxxx // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx mov eax,edx shl eax,12 //Alpha and eax,0x10000000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shl eax,8 // Intensity and eax,0x0E000000 or ecx,eax shr eax,3 and eax,0x01000000 or ecx,eax mov [edi],ecx add edi,4 // } // 2nd dword { xor ecx,ecx // pixel #5 // xxxxxxxxxxxxxxxxIIIAxxxxxxxxxxxx // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII mov eax,edx shr eax,8 //Alpha and eax,0x00000010 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shr eax,12 // Intensity and eax,0x0000000E or ecx,eax shr eax,3 or ecx,eax // pixel #6 // xxxxxxxxxxxxxxxxxxxxIIIAxxxxxxxx // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx //Alpha mov eax,edx shl eax,4 and eax,0x00001000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx // Intensity and eax,0x00000E00 or ecx,eax shr eax,3 and eax,0x00000100 or ecx,eax // pixel #7 // xxxxxxxxxxxxxxxxxxxxxxxxIIIAxxxx // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx //Alpha mov eax,edx shl eax,16 and eax,0x00100000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shl eax,12 // Intensity and eax,0x000E0000 or ecx,eax shr eax,3 and eax,0x00010000 or ecx,eax // pixel #8 // xxxxxxxxxxxxxxxxxxxxxxxxxxxxIIIA // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx mov eax,edx shl eax,28 //Alpha and eax,0x10000000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shl eax,24 // Intensity and eax,0x0E000000 or ecx,eax shr eax,3 and eax,0x01000000 or ecx,eax mov [edi],ecx add edi,4 // } // * pop ecx dec ecx jnz x_loop_2 add esi,[line] add edi,[ext] pop ecx dec ecx jnz y_loop end_y_loop: pop edi pop esi pop ebx mov esp, ebp pop ebp ret } } //**************************************************************** // Size: 0, Format: 4 // 2009 ported to NASM - Sergey (Gonetz) Lipski extern "C" void __declspec(naked) asmLoad4bI (wxUIntPtr src, int dst, wxUIntPtr wid_64, int height, int line, int ext) { _asm { push ebp mov ebp, esp push ebx push esi push edi mov esi,[src] mov edi,[dst] mov ecx,[height] y_loop: push ecx mov ecx,[wid_64] x_loop: push ecx mov eax,[esi] // read all 8 pixels bswap eax add esi,4 mov edx,eax // 1st dword { xor ecx,ecx shr eax,28 // 0xF0000000 -> 0x0000000F or ecx,eax shl eax,4 or ecx,eax mov eax,edx // 0x0F000000 -> 0x00000F00 shr eax,16 and eax,0x00000F00 or ecx,eax shl eax,4 or ecx,eax mov eax,edx shr eax,4 // 0x00F00000 -> 0x000F0000 and eax,0x000F0000 or ecx,eax shl eax,4 or ecx,eax mov eax,edx shl eax,8 // 0x000F0000 -> 0x0F000000 and eax,0x0F000000 or ecx,eax shl eax,4 or ecx,eax mov [edi],ecx add edi,4 // } // 2nd dword { xor ecx,ecx mov eax,edx shr eax,12 // 0x0000F000 -> 0x0000000F and eax,0x0000000F or ecx,eax shl eax,4 or ecx,eax mov eax,edx // 0x00000F00 -> 0x00000F00 and eax,0x00000F00 or ecx,eax shl eax,4 or ecx,eax mov eax,edx shl eax,12 // 0x000000F0 -> 0x000F0000 and eax,0x000F0000 or ecx,eax shl eax,4 or ecx,eax shl edx,24 // 0x0000000F -> 0x0F000000 and edx,0x0F000000 or ecx,edx shl edx,4 or ecx,edx mov [edi],ecx add edi,4 // } // * copy mov eax,[esi] // read all 8 pixels bswap eax add esi,4 mov edx,eax // 1st dword { xor ecx,ecx shr eax,28 // 0xF0000000 -> 0x0000000F or ecx,eax shl eax,4 or ecx,eax mov eax,edx // 0x0F000000 -> 0x00000F00 shr eax,16 and eax,0x00000F00 or ecx,eax shl eax,4 or ecx,eax mov eax,edx shr eax,4 // 0x00F00000 -> 0x000F0000 and eax,0x000F0000 or ecx,eax shl eax,4 or ecx,eax mov eax,edx shl eax,8 // 0x000F0000 -> 0x0F000000 and eax,0x0F000000 or ecx,eax shl eax,4 or ecx,eax mov [edi],ecx add edi,4 // } // 2nd dword { xor ecx,ecx mov eax,edx shr eax,12 // 0x0000F000 -> 0x0000000F and eax,0x0000000F or ecx,eax shl eax,4 or ecx,eax mov eax,edx // 0x00000F00 -> 0x00000F00 and eax,0x00000F00 or ecx,eax shl eax,4 or ecx,eax mov eax,edx shl eax,12 // 0x000000F0 -> 0x000F0000 and eax,0x000F0000 or ecx,eax shl eax,4 or ecx,eax shl edx,24 // 0x0000000F -> 0x0F000000 and edx,0x0F000000 or ecx,edx shl edx,4 or ecx,edx mov [edi],ecx add edi,4 // } // * pop ecx dec ecx jnz x_loop pop ecx dec ecx jz near end_y_loop push ecx add esi,[line] add edi,[ext] mov ecx,[wid_64] x_loop_2: push ecx mov eax,[esi+4] // read all 8 pixels bswap eax mov edx,eax // 1st dword { xor ecx,ecx shr eax,28 // 0xF0000000 -> 0x0000000F or ecx,eax shl eax,4 or ecx,eax mov eax,edx // 0x0F000000 -> 0x00000F00 shr eax,16 and eax,0x00000F00 or ecx,eax shl eax,4 or ecx,eax mov eax,edx shr eax,4 // 0x00F00000 -> 0x000F0000 and eax,0x000F0000 or ecx,eax shl eax,4 or ecx,eax mov eax,edx shl eax,8 // 0x000F0000 -> 0x0F000000 and eax,0x0F000000 or ecx,eax shl eax,4 or ecx,eax mov [edi],ecx add edi,4 // } // 2nd dword { xor ecx,ecx mov eax,edx shr eax,12 // 0x0000F000 -> 0x0000000F and eax,0x0000000F or ecx,eax shl eax,4 or ecx,eax mov eax,edx // 0x00000F00 -> 0x00000F00 and eax,0x00000F00 or ecx,eax shl eax,4 or ecx,eax mov eax,edx shl eax,12 // 0x000000F0 -> 0x000F0000 and eax,0x000F0000 or ecx,eax shl eax,4 or ecx,eax shl edx,24 // 0x0000000F -> 0x0F000000 and edx,0x0F000000 or ecx,edx shl edx,4 or ecx,edx mov [edi],ecx add edi,4 // } // * copy mov eax,[esi] // read all 8 pixels bswap eax add esi,8 mov edx,eax // 1st dword { xor ecx,ecx shr eax,28 // 0xF0000000 -> 0x0000000F or ecx,eax shl eax,4 or ecx,eax mov eax,edx // 0x0F000000 -> 0x00000F00 shr eax,16 and eax,0x00000F00 or ecx,eax shl eax,4 or ecx,eax mov eax,edx shr eax,4 // 0x00F00000 -> 0x000F0000 and eax,0x000F0000 or ecx,eax shl eax,4 or ecx,eax mov eax,edx shl eax,8 // 0x000F0000 -> 0x0F000000 and eax,0x0F000000 or ecx,eax shl eax,4 or ecx,eax mov [edi],ecx add edi,4 // } // 2nd dword { xor ecx,ecx mov eax,edx shr eax,12 // 0x0000F000 -> 0x0000000F and eax,0x0000000F or ecx,eax shl eax,4 or ecx,eax mov eax,edx // 0x00000F00 -> 0x00000F00 and eax,0x00000F00 or ecx,eax shl eax,4 or ecx,eax mov eax,edx shl eax,12 // 0x000000F0 -> 0x000F0000 and eax,0x000F0000 or ecx,eax shl eax,4 or ecx,eax shl edx,24 // 0x0000000F -> 0x0F000000 and edx,0x0F000000 or ecx,edx shl edx,4 or ecx,edx mov [edi],ecx add edi,4 // } // * pop ecx dec ecx jnz x_loop_2 add esi,[line] add edi,[ext] pop ecx dec ecx jnz y_loop end_y_loop: pop edi pop esi pop ebx mov esp, ebp pop ebp ret } } //**************************************************************** //8b textures load //**************************************************************** //**************************************************************** // Size: 1, Format: 2 // // 2008.03.29 cleaned up - H.Morii // 2009 ported to NASM - Sergey (Gonetz) Lipski extern "C" void __declspec(naked) asmLoad8bCI (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext, wxUIntPtr pal) { _asm { push ebp mov ebp, esp push ebx push esi push edi mov ebx,[pal] mov esi,[src] mov edi,[dst] mov ecx,[height] y_loop: push ecx mov ecx,[wid_64] x_loop: push ecx mov eax,[esi] // read all 4 pixels bswap eax add esi,4 mov edx,eax // 1st dword output { shr eax,15 and eax,0x1FE mov cx,[ebx+eax] ror cx,1 shl ecx,16 mov eax,edx shr eax,23 and eax,0x1FE mov cx,[ebx+eax] ror cx,1 mov [edi],ecx add edi,4 // } // 2nd dword output { mov eax,edx shl eax,1 and eax,0x1FE mov cx,[ebx+eax] ror cx,1 shl ecx,16 shr edx,7 and edx,0x1FE mov cx,[ebx+edx] ror cx,1 mov [edi],ecx add edi,4 // } // * copy mov eax,[esi] // read all 4 pixels bswap eax add esi,4 mov edx,eax // 1st dword output { shr eax,15 and eax,0x1FE mov cx,[ebx+eax] ror cx,1 shl ecx,16 mov eax,edx shr eax,23 and eax,0x1FE mov cx,[ebx+eax] ror cx,1 mov [edi],ecx add edi,4 // } // 2nd dword output { mov eax,edx shl eax,1 and eax,0x1FE mov cx,[ebx+eax] ror cx,1 shl ecx,16 shr edx,7 and edx,0x1FE mov cx,[ebx+edx] ror cx,1 mov [edi],ecx add edi,4 // } // * pop ecx dec ecx jnz x_loop pop ecx dec ecx jz near end_y_loop push ecx mov eax,esi add eax,[line] mov esi,[src] sub eax,esi and eax,0x7FF add esi,eax add edi,[ext] mov ecx,[wid_64] x_loop_2: push ecx mov eax,[esi+4] // read all 4 pixels bswap eax mov edx,eax // 1st dword output { shr eax,15 and eax,0x1FE mov cx,[ebx+eax] ror cx,1 shl ecx,16 mov eax,edx shr eax,23 and eax,0x1FE mov cx,[ebx+eax] ror cx,1 mov [edi],ecx add edi,4 // } // 2nd dword output { mov eax,edx shl eax,1 and eax,0x1FE mov cx,[ebx+eax] ror cx,1 shl ecx,16 shr edx,7 and edx,0x1FE mov cx,[ebx+edx] ror cx,1 mov [edi],ecx add edi,4 // } // * copy mov eax,[esi] // read all 4 pixels bswap eax mov edx,esi add edx,8 mov esi,[src] sub edx,esi and edx,0x7FF add esi,edx mov edx,eax // 1st dword output { shr eax,15 and eax,0x1FE mov cx,[ebx+eax] ror cx,1 shl ecx,16 mov eax,edx shr eax,23 and eax,0x1FE mov cx,[ebx+eax] ror cx,1 mov [edi],ecx add edi,4 // } // 2nd dword output { mov eax,edx shl eax,1 and eax,0x1FE mov cx,[ebx+eax] ror cx,1 shl ecx,16 shr edx,7 and edx,0x1FE mov cx,[ebx+edx] ror cx,1 mov [edi],ecx add edi,4 // } // * pop ecx dec ecx jnz x_loop_2 mov eax,esi add eax,[line] mov esi,[src] sub eax,esi and eax,0x7FF add esi,eax add edi,[ext] pop ecx dec ecx jnz y_loop end_y_loop: pop edi pop esi pop ebx mov esp, ebp pop ebp ret } } extern "C" void __declspec(naked) asmLoad8bIA8 (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext, wxUIntPtr pal) { _asm { push ebp mov ebp, esp push ebx push esi push edi mov ebx,[pal] mov esi,[src] mov edi,[dst] mov ecx,[height] y_loop: push ecx mov ecx,[wid_64] x_loop: push ecx mov eax,[esi] // read all 4 pixels bswap eax add esi,4 mov edx,eax // 1st dword output { shr eax,15 and eax,0x1FE mov cx,[ebx+eax] ror cx,8 shl ecx,16 mov eax,edx shr eax,23 and eax,0x1FE mov cx,[ebx+eax] ror cx,8 mov [edi],ecx add edi,4 // } // 2nd dword output { mov eax,edx shl eax,1 and eax,0x1FE mov cx,[ebx+eax] ror cx,8 shl ecx,16 shr edx,7 and edx,0x1FE mov cx,[ebx+edx] ror cx,8 mov [edi],ecx add edi,4 // } // * copy mov eax,[esi] // read all 4 pixels bswap eax add esi,4 mov edx,eax // 1st dword output { shr eax,15 and eax,0x1FE mov cx,[ebx+eax] ror cx,8 shl ecx,16 mov eax,edx shr eax,23 and eax,0x1FE mov cx,[ebx+eax] ror cx,8 mov [edi],ecx add edi,4 // } // 2nd dword output { mov eax,edx shl eax,1 and eax,0x1FE mov cx,[ebx+eax] ror cx,8 shl ecx,16 shr edx,7 and edx,0x1FE mov cx,[ebx+edx] ror cx,8 mov [edi],ecx add edi,4 // } // * pop ecx dec ecx jnz x_loop pop ecx dec ecx jz near end_y_loop push ecx add esi,[line] add edi,[ext] mov ecx,[wid_64] x_loop_2: push ecx mov eax,[esi+4] // read all 4 pixels bswap eax mov edx,eax // 1st dword output { shr eax,15 and eax,0x1FE mov cx,[ebx+eax] ror cx,8 shl ecx,16 mov eax,edx shr eax,23 and eax,0x1FE mov cx,[ebx+eax] ror cx,8 mov [edi],ecx add edi,4 // } // 2nd dword output { mov eax,edx shl eax,1 and eax,0x1FE mov cx,[ebx+eax] ror cx,8 shl ecx,16 shr edx,7 and edx,0x1FE mov cx,[ebx+edx] ror cx,8 mov [edi],ecx add edi,4 // } // * copy mov eax,[esi] // read all 4 pixels bswap eax add esi,8 mov edx,eax // 1st dword output { shr eax,15 and eax,0x1FE mov cx,[ebx+eax] ror cx,8 shl ecx,16 mov eax,edx shr eax,23 and eax,0x1FE mov cx,[ebx+eax] ror cx,8 mov [edi],ecx add edi,4 // } // 2nd dword output { mov eax,edx shl eax,1 and eax,0x1FE mov cx,[ebx+eax] ror cx,8 shl ecx,16 shr edx,7 and edx,0x1FE mov cx,[ebx+edx] ror cx,8 mov [edi],ecx add edi,4 // } // * pop ecx dec ecx jnz x_loop_2 add esi,[line] add edi,[ext] pop ecx dec ecx jnz y_loop end_y_loop: pop edi pop esi pop ebx mov esp, ebp pop ebp ret } } //**************************************************************** // Size: 1, Format: 3 // // ** by Gugaman ** // // 2008.03.29 cleaned up - H.Morii // 2009 ported to NASM - Sergey (Gonetz) Lipski extern "C" void __declspec(naked) asmLoad8bIA4 (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext) { _asm { push ebp mov ebp, esp push ebx push esi push edi mov esi,[src] mov edi,[dst] mov ecx,[height] y_loop: push ecx mov ecx,[wid_64] x_loop: mov eax,[esi] // read all 4 pixels mov edx,eax shr eax,4 //all alpha shl edx,4 and eax,0x0F0F0F0F and edx,0xF0F0F0F0 add esi,4 or eax,edx mov [edi],eax // save dword add edi,4 mov eax,[esi] // read all 4 pixels mov edx,eax shr eax,4 //all alpha shl edx,4 and eax,0x0F0F0F0F and edx,0xF0F0F0F0 add esi,4 or eax,edx mov [edi],eax // save dword add edi,4 // * dec ecx jnz x_loop pop ecx dec ecx jz end_y_loop push ecx add esi,[line] add edi,[ext] mov ecx,[wid_64] x_loop_2: mov eax,[esi+4] // read both pixels mov edx,eax shr eax,4 //all alpha shl edx,4 and eax,0x0F0F0F0F and edx,0xF0F0F0F0 or eax,edx mov [edi],eax //save dword add edi,4 mov eax,[esi] // read both pixels add esi,8 mov edx,eax shr eax,4 //all alpha shl edx,4 and eax,0x0F0F0F0F and edx,0xF0F0F0F0 or eax,edx mov [edi],eax //save dword add edi,4 // * dec ecx jnz x_loop_2 add esi,[line] add edi,[ext] pop ecx dec ecx jnz y_loop end_y_loop: pop edi pop esi pop ebx mov esp, ebp pop ebp ret } } //**************************************************************** // Size: 1, Format: 4 // // ** by Gugaman ** // 2009 ported to NASM - Sergey (Gonetz) Lipski extern "C" void __declspec(naked) asmLoad8bI (wxUIntPtr src, int dst, wxUIntPtr wid_64, int height, int line, int ext) { _asm { push ebp mov ebp, esp push ebx push esi push edi mov esi,[src] mov edi,[dst] mov ecx,[height] y_loop: push ecx mov ecx,[wid_64] x_loop: mov eax,[esi] // read all 4 pixels add esi,4 mov [edi],eax // save dword add edi,4 mov eax,[esi] // read all 4 pixels add esi,4 mov [edi],eax // save dword add edi,4 // * dec ecx jnz x_loop pop ecx dec ecx jz end_y_loop push ecx add esi,[line] add edi,[ext] mov ecx,[wid_64] x_loop_2: mov eax,[esi+4] // read both pixels mov [edi],eax //save dword add edi,4 mov eax,[esi] // read both pixels add esi,8 mov [edi],eax //save dword add edi,4 // * dec ecx jnz x_loop_2 add esi,[line] add edi,[ext] pop ecx dec ecx jnz y_loop end_y_loop: pop edi pop esi pop ebx mov esp, ebp pop ebp ret } } //**************************************************************** //16b textures load //**************************************************************** //**************************************************************** // Size: 2, Format: 0 // // 2008.03.29 cleaned up - H.Morii // 2009 ported to NASM - Sergey (Gonetz) Lipski extern "C" void __declspec(naked) asmLoad16bRGBA (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext) { _asm { align 4 push ebp mov ebp,esp push ebx push esi push edi mov esi,[src] mov edi,[dst] mov ecx,[height] y_loop: push ecx mov ecx,[wid_64] x_loop: mov eax,[esi] // read both pixels mov ebx,[esi+4] // read both pixels bswap eax bswap ebx ror ax,1 ror bx,1 ror eax,16 ror ebx,16 ror ax,1 ror bx,1 mov [edi],eax mov [edi+4],ebx add esi,8 add edi,8 dec ecx jnz x_loop pop ecx dec ecx jz end_y_loop push ecx mov eax,esi add eax,[line] mov esi,[src] sub eax, esi and eax, 0xFFF add esi, eax add edi,[ext] mov ecx,[wid_64] x_loop_2: mov eax,[esi+4] // read both pixels mov ebx,[esi] // read both pixels bswap eax bswap ebx ror ax,1 ror bx,1 ror eax,16 ror ebx,16 ror ax,1 ror bx,1 mov [edi],eax mov [edi+4],ebx add esi,8 add edi,8 dec ecx jnz x_loop_2 mov eax,esi add eax,[line] mov esi,[src] sub eax, esi and eax, 0xFFF add esi, eax add edi,[ext] pop ecx dec ecx jnz y_loop end_y_loop: pop edi pop esi pop ebx mov esp, ebp pop ebp ret } } //**************************************************************** // Size: 2, Format: 3 // // ** by Gugaman/Dave2001 ** // // 2008.03.29 cleaned up - H.Morii // 2009 ported to NASM - Sergey (Gonetz) Lipski extern "C" void __declspec(naked) asmLoad16bIA (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext) { _asm { ALIGN 4 push ebp mov ebp, esp push ebx push esi push edi mov esi,[src] mov edi,[dst] mov ecx,[height] y_loop: push ecx mov ecx,[wid_64] x_loop: mov eax,[esi] // read both pixels mov ebx,[esi+4] // read both pixels mov [edi],eax mov [edi+4],ebx add esi,8 add edi,8 dec ecx jnz x_loop pop ecx dec ecx jz end_y_loop push ecx add esi,[line] add edi,[ext] mov ecx,[wid_64] x_loop_2: mov eax,[esi+4] // read both pixels mov ebx,[esi] // read both pixels mov [edi],eax mov [edi+4],ebx add esi,8 add edi,8 dec ecx jnz x_loop_2 add esi,[line] add edi,[ext] pop ecx dec ecx jnz y_loop end_y_loop: pop edi pop esi pop ebx mov esp, ebp pop ebp ret } } //**************************************************************** // // ******** Textures mirror/clamp/wrap ******** // //**************************************************************** //**************************************************************** //8b textures mirror/clamp/wrap //**************************************************************** extern "C" void __declspec(naked) asmMirror8bS (int tex, int start, int width, int height, int mask, int line, int full, int count) { _asm{ ALIGN 4 push ebp mov ebp, esp push ebx push esi push edi mov edi,[start] mov ecx,[height] loop_y: xor edx,edx loop_x: mov esi,[tex] mov ebx,[width] add ebx,edx and ebx,[width] jnz is_mirrored mov eax,edx and eax,[mask] add esi,eax mov al,[esi] mov [edi],al inc edi jmp end_mirror_check is_mirrored: add esi,[mask] mov eax,edx and eax,[mask] sub esi,eax mov al,[esi] mov [edi],al inc edi end_mirror_check: inc edx cmp edx,[count] jne loop_x add edi,[line] mov eax,[tex] add eax,[full] mov [tex],eax dec ecx jnz loop_y pop edi pop esi pop ebx mov esp, ebp pop ebp ret } } extern "C" void __declspec(naked) asmWrap8bS (int tex, int start, int height, int mask, int line, int full, int count) { _asm { align 4 push ebp mov ebp, esp push ebx push esi push edi mov edi,[start] mov ecx,[height] loop_y: xor edx,edx loop_x: mov esi,[tex] mov eax,edx and eax,[mask] shl eax,2 add esi,eax mov eax,[esi] mov [edi],eax add edi,4 inc edx cmp edx,[count] jne loop_x add edi,[line] mov eax,[tex] add eax,[full] mov [tex],eax dec ecx jnz loop_y pop edi pop esi pop ebx mov esp, ebp pop ebp ret } } extern "C" void __declspec(naked) asmClamp8bS (int tex, int constant, int height,int line, int full, int count) { _asm { align 4 push ebp mov ebp, esp push ebx push esi push edi mov esi,[constant] mov edi,[tex] mov ecx,[height] y_loop: mov al,[esi] mov edx,[count] x_loop: mov [edi],al // don't unroll or make dword, it may go into next line (doesn't have to be multiple of two) inc edi dec edx jnz x_loop add esi,[full] add edi,[line] dec ecx jnz y_loop pop edi pop esi pop ebx mov esp, ebp pop ebp ret } } //**************************************************************** //16b textures mirror/clamp/wrap //**************************************************************** extern "C" void __declspec(naked) asmMirror16bS (int tex, int start, int width, int height, int mask, int line, int full, int count) { _asm { align 4 push ebp mov ebp, esp push ebx push esi push edi mov edi,[start] mov ecx,[height] loop_y: xor edx,edx loop_x: mov esi,[tex] mov ebx,[width] add ebx,edx and ebx,[width] jnz is_mirrored mov eax,edx shl eax,1 and eax,[mask] add esi,eax mov ax,[esi] mov [edi],ax add edi,2 jmp end_mirror_check is_mirrored: add esi,[mask] mov eax,edx shl eax,1 and eax,[mask] sub esi,eax mov ax,[esi] mov [edi],ax add edi,2 end_mirror_check: inc edx cmp edx,[count] jne loop_x add edi,[line] mov eax,[tex] add eax,[full] mov [tex],eax dec ecx jnz loop_y pop edi pop esi pop ebx mov esp, ebp pop ebp ret } } extern "C" void __declspec(naked) asmWrap16bS (int tex, int start, int height, int mask, int line, int full, int count) { _asm { align 4 push ebp mov ebp, esp push ebx push esi push edi mov edi,[start] mov ecx,[height] loop_y: xor edx,edx loop_x: mov esi,[tex] mov eax,edx and eax,[mask] shl eax,2 add esi,eax mov eax,[esi] mov [edi],eax add edi,4 inc edx cmp edx,[count] jne loop_x add edi,[line] mov eax,[tex] add eax,[full] mov [tex],eax dec ecx jnz loop_y pop edi pop esi pop ebx mov esp, ebp pop ebp ret } } extern "C" void __declspec(naked) asmClamp16bS (int tex, int constant, int height,int line, int full, int count) { _asm { align 4 push ebp mov ebp, esp push ebx push esi push edi mov esi,[constant] mov edi,[tex] mov ecx,[height] y_loop: mov ax,[esi] mov edx,[count] x_loop: mov [edi],ax // don't unroll or make dword, it may go into next line (doesn't have to be multiple of two) add edi,2 dec edx jnz x_loop add esi,[full] add edi,[line] dec ecx jnz y_loop pop edi pop esi pop ebx mov esp, ebp pop ebp ret } } //**************************************************************** //32b textures mirror/clamp/wrap //**************************************************************** extern "C" void __declspec(naked) asmMirror32bS (int tex, int start, int width, int height, int mask, int line, int full, int count) { _asm { align 4 push ebp mov ebp, esp push ebx push esi push edi mov edi,[start] mov ecx,[height] loop_y: xor edx,edx loop_x: mov esi,[tex] mov ebx,[width] add ebx,edx and ebx,[width] jnz is_mirrored mov eax,edx shl eax,2 and eax,[mask] add esi,eax mov eax,[esi] mov [edi],eax add edi,4 jmp end_mirror_check is_mirrored: add esi,[mask] mov eax,edx shl eax,2 and eax,[mask] sub esi,eax mov eax,[esi] mov [edi],eax add edi,4 end_mirror_check: inc edx cmp edx,[count] jne loop_x add edi,[line] mov eax,[tex] add eax,[full] mov [tex],eax dec ecx jnz loop_y pop edi pop esi pop ebx mov esp, ebp pop ebp ret } } extern "C" void __declspec(naked) asmWrap32bS (int tex, int start, int height, int mask, int line, int full, int count) { _asm { align 4 push ebp mov ebp, esp push ebx push esi push edi mov edi,[start] mov ecx,[height] loop_y: xor edx,edx loop_x: mov esi,[tex] mov eax,edx and eax,[mask] shl eax,2 add esi,eax mov eax,[esi] mov [edi],eax add edi,4 inc edx cmp edx,[count] jne loop_x add edi,[line] mov eax,[tex] add eax,[full] mov [tex],eax dec ecx jnz loop_y pop edi pop esi pop ebx mov esp, ebp pop ebp ret } } extern "C" void __declspec(naked) asmClamp32bS (int tex, int constant, int height,int line, int full, int count) { _asm { align 4 push ebp mov ebp, esp push ebx push esi push edi mov esi,[constant] mov edi,[tex] mov ecx,[height] y_loop: mov eax,[esi] mov edx,[count] x_loop: mov [edi],eax // don't unroll or make dword, it may go into next line (doesn't have to be multiple of two) add edi,4 dec edx jnz x_loop add esi,[full] add edi,[line] dec ecx jnz y_loop pop edi pop esi pop ebx mov esp, ebp pop ebp ret } } //**************************************************************** // // ******** Textures conversion ******** // //**************************************************************** extern "C" void __declspec(naked) asmTexConv_ARGB1555_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int isize) { _asm { align 4 push ebp mov ebp, esp push ebx push esi push edi mov esi,[src] mov edi,[dst] mov ecx,[isize] tc1_loop: mov eax,[esi] add esi,4 // arrr rrgg gggb bbbb // aaaa rrrr gggg bbbb mov edx,eax and eax,0x80008000 mov ebx,eax // ebx = 0xa000000000000000 shr eax,1 or ebx,eax // ebx = 0xaa00000000000000 shr eax,1 or ebx,eax // ebx = 0xaaa0000000000000 shr eax,1 or ebx,eax // ebx = 0xaaaa000000000000 mov eax,edx and eax,0x78007800 // eax = 0x0rrrr00000000000 shr eax,3 // eax = 0x0000rrrr00000000 or ebx,eax // ebx = 0xaaaarrrr00000000 mov eax,edx and eax,0x03c003c0 // eax = 0x000000gggg000000 shr eax,2 // eax = 0x00000000gggg0000 or ebx,eax // ebx = 0xaaaarrrrgggg0000 and edx,0x001e001e // edx = 0x00000000000bbbb0 shr edx,1 // edx = 0x000000000000bbbb or ebx,edx // ebx = 0xaaaarrrrggggbbbb mov [edi],ebx add edi,4 dec ecx jnz tc1_loop pop edi pop esi pop ebx mov esp, ebp pop ebp ret } } extern "C" void __declspec(naked) asmTexConv_AI88_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int isize) { _asm { align 4 push ebp mov ebp, esp push ebx push esi push edi mov esi,[src] mov edi,[dst] mov ecx,[isize] tc1_loop: mov eax,[esi] add esi,4 // aaaa aaaa iiii iiii // aaaa rrrr gggg bbbb mov edx,eax and eax,0xF000F000 // eax = 0xaaaa000000000000 mov ebx,eax // ebx = 0xaaaa000000000000 and edx,0x00F000F0 // edx = 0x00000000iiii0000 shl edx,4 // edx = 0x0000iiii00000000 or ebx,edx // ebx = 0xaaaaiiii00000000 shr edx,4 // edx = 0x00000000iiii0000 or ebx,edx // ebx = 0xaaaaiiiiiiii0000 shr edx,4 // edx = 0x000000000000iiii or ebx,edx // ebx = 0xaaaaiiiiiiiiiiii mov [edi],ebx add edi,4 dec ecx jnz tc1_loop pop edi pop esi pop ebx mov esp, ebp pop ebp ret } } extern "C" void __declspec(naked) asmTexConv_AI44_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int isize) { _asm { align 4 push ebp mov ebp, esp push ebx push esi push edi mov esi,[src] mov edi,[dst] mov ecx,[isize] tc1_loop: mov eax,[esi] add esi,4 // aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0 // aaaa1 rrrr1 gggg1 bbbb1 aaaa0 rrrr0 gggg0 bbbb0 // aaaa3 rrrr3 gggg3 bbbb3 aaaa2 rrrr2 gggg2 bbbb2 mov edx,eax // eax = aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0 shl eax,16 // eax = aaaa1 iiii1 aaaa0 iiii0 0000 0000 0000 0000 and eax,0xFF000000 // eax = aaaa1 iiii1 0000 0000 0000 0000 0000 0000 mov ebx,eax // ebx = aaaa1 iiii1 0000 0000 0000 0000 0000 0000 and eax,0x0F000000 // eax = 0000 iiii1 0000 0000 0000 0000 0000 0000 shr eax,4 // eax = 0000 0000 iiii1 0000 0000 0000 0000 0000 or ebx,eax // ebx = aaaa1 iiii1 iiii1 0000 0000 0000 0000 0000 shr eax,4 // eax = 0000 0000 0000 iiii1 0000 0000 0000 0000 or ebx,eax // ebx = aaaa1 iiii1 iiii1 iiii1 0000 0000 0000 0000 mov eax,edx // eax = aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0 shl eax,8 // eax = aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0 0000 0000 and eax,0x0000FF00 // eax = 0000 0000 0000 0000 aaaa0 iiii0 0000 0000 or ebx,eax // ebx = aaaa1 iiii1 iiii1 iiii1 aaaa0 iiii0 0000 0000 and eax,0x00000F00 // eax = 0000 0000 0000 0000 0000 iiii0 0000 0000 shr eax,4 // eax = 0000 0000 0000 0000 0000 0000 iiii0 0000 or ebx,eax // ebx = aaaa1 iiii1 iiii1 iiii1 aaaa0 iiii0 iiii0 0000 shr eax,4 // eax = 0000 0000 0000 0000 0000 0000 0000 iiii0 or ebx,eax // ebx = aaaa1 iiii1 iiii1 iiii1 aaaa0 iiii0 iiii0 iiii0 mov [edi],ebx add edi,4 mov eax,edx // eax = aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0 and eax,0xFF000000 // eax = aaaa3 iiii3 0000 0000 0000 0000 0000 0000 mov ebx,eax // ebx = aaaa3 iiii3 0000 0000 0000 0000 0000 0000 and eax,0x0F000000 // eax = 0000 iiii3 0000 0000 0000 0000 0000 0000 shr eax,4 // eax = 0000 0000 iiii3 0000 0000 0000 0000 0000 or ebx,eax // ebx = aaaa3 iiii3 iiii3 0000 0000 0000 0000 0000 shr eax,4 // eax = 0000 0000 0000 iiii3 0000 0000 0000 0000 or ebx,eax // ebx = aaaa3 iiii3 iiii3 iiii3 0000 0000 0000 0000 // edx = aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0 shr edx,8 // edx = 0000 0000 aaaa3 aaaa3 aaaa2 iiii2 aaaa1 iiii1 and edx,0x0000FF00 // edx = 0000 0000 0000 0000 aaaa2 iiii2 0000 0000 or ebx,edx // ebx = aaaa3 iiii3 iiii3 iiii3 aaaa2 iiii2 0000 0000 and edx,0x00000F00 // edx = 0000 0000 0000 0000 0000 iiii2 0000 0000 shr edx,4 // edx = 0000 0000 0000 0000 0000 0000 iiii2 0000 or ebx,edx // ebx = aaaa3 iiii3 iiii3 iiii3 aaaa2 iiii2 iiii2 0000 shr edx,4 // edx = 0000 0000 0000 0000 0000 0000 0000 iiii2 or ebx,edx // ebx = aaaa3 iiii3 iiii3 iiii3 aaaa2 iiii2 iiii2 iiii2 mov [edi],ebx add edi,4 dec ecx jnz tc1_loop pop edi pop esi pop ebx mov esp, ebp pop ebp ret } } extern "C" void __declspec(naked) asmTexConv_A8_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int isize) { _asm { align 4 push ebp mov ebp, esp push ebx push esi push edi mov esi,[src] mov edi,[dst] mov ecx,[isize] tc1_loop: mov eax,[esi] add esi,4 // aaaa3 aaaa3 aaaa2 aaaa2 aaaa1 aaaa1 aaaa0 aaaa0 // aaaa1 rrrr1 gggg1 bbbb1 aaaa0 rrrr0 gggg0 bbbb0 // aaaa3 rrrr3 gggg3 bbbb3 aaaa2 rrrr2 gggg2 bbbb2 mov edx,eax and eax,0x0000F000 // eax = 00 00 00 00 a1 00 00 00 shl eax,16 // eax = a1 00 00 00 00 00 00 00 mov ebx,eax // ebx = a1 00 00 00 00 00 00 00 shr eax,4 or ebx,eax // ebx = a1 a1 00 00 00 00 00 00 shr eax,4 or ebx,eax // ebx = a1 a1 a1 00 00 00 00 00 shr eax,4 or ebx,eax // ebx = a1 a1 a1 a1 00 00 00 00 mov eax,edx and eax,0x000000F0 // eax = 00 00 00 00 00 00 a0 00 shl eax,8 // eax = 00 00 00 00 a0 00 00 00 or ebx,eax shr eax,4 or ebx,eax shr eax,4 or ebx,eax shr eax,4 or ebx,eax // ebx = a1 a1 a1 a1 a0 a0 a0 a0 mov [edi],ebx add edi,4 mov eax,edx // eax = a3 a3 a2 a2 a1 a1 a0 a0 and eax,0xF0000000 // eax = a3 00 00 00 00 00 00 00 mov ebx,eax // ebx = a3 00 00 00 00 00 00 00 shr eax,4 or ebx,eax // ebx = a3 a3 00 00 00 00 00 00 shr eax,4 or ebx,eax // ebx = a3 a3 a3 00 00 00 00 00 shr eax,4 or ebx,eax // ebx = a3 a3 a3 a3 00 00 00 00 and edx,0x00F00000 // eax = 00 00 a2 00 00 00 00 00 shr edx,8 // eax = 00 00 00 00 a2 00 00 00 or ebx,edx shr edx,4 or ebx,edx shr edx,4 or ebx,edx shr edx,4 or ebx,edx // ebx = a3 a3 a3 a3 a2 a2 a2 a2 mov [edi],ebx add edi,4 dec ecx jnz tc1_loop pop edi pop esi pop ebx mov esp, ebp pop ebp ret } } //**************************************************************** // // ******** Tmem functions ******** // //**************************************************************** //**************************************************************** // CopyBlock - copies a block from base_addr+offset to dest_addr, while unswapping the // data within. // // edi = dest_addr -> end of dest // ecx = num_words // esi = base_addr (preserved) // edx = offset (preserved) //**************************************************************** void __declspec(naked) CopyBlock ( void ) { _asm { align 4 push ebp mov ebp, esp push eax push ebx push esi push edx or ecx,ecx jz near copyblock_end push ecx // first, set the source address and check if not on a dword boundary push esi push edx mov ebx,edx and edx,0FFFFFFFCh add esi,edx and ebx,3 // ebx = # we DON'T need to copy jz copyblock_copy mov edx,4 // ecx = # we DO need to copy sub edx,ebx // load the first word, accounting for swapping mov eax,[esi] add esi,4 copyblock_precopy_skip: rol eax,8 dec ebx jnz copyblock_precopy_skip copyblock_precopy_copy: rol eax,8 mov [edi],al inc edi dec edx jnz copyblock_precopy_copy mov eax,[esi] add esi,4 bswap eax mov [edi],eax add edi,4 dec ecx // 1 less word to copy jz copyblock_postcopy copyblock_copy: mov eax,[esi] bswap eax mov [edi],eax mov eax,[esi+4] bswap eax mov [edi+4],eax add esi,8 add edi,8 dec ecx jnz copyblock_copy copyblock_postcopy: pop edx pop esi pop ecx // check again if on dword boundary mov ebx,edx // ebx = # we DO need to copy and ebx,3 jz copyblock_end shl ecx,3 // ecx = num_words * 8 add edx,ecx and edx,0FFFFFFFCh add esi,edx mov eax,[esi] copyblock_postcopy_copy: rol eax,8 mov [edi],al inc edi dec ebx jnz copyblock_postcopy_copy copyblock_end: pop edx pop esi pop ebx pop eax mov esp, ebp pop ebp ret } } extern "C" __declspec(naked) void SwapBlock32 ( void ) { //**************************************************************** // SwapBlock - swaps every other 32-bit word at addr // // ecx = num_words -> 0 // edi = addr -> end of dest //**************************************************************** _asm { align 4 push ebp mov ebp, esp push eax push ebx or ecx,ecx jz swapblock32_end swapblock32_loop: mov eax,[edi] mov ebx,[edi+4] mov [edi],ebx mov [edi+4],eax add edi,8 dec ecx jnz swapblock32_loop swapblock32_end: pop ebx pop eax mov esp, ebp pop ebp ret } } //**************************************************************** // // ******** Load block/tile ******** // //**************************************************************** extern "C" __declspec(naked) void asmLoadBlock(int src, int dst, int off, int dxt, int cnt, wxUIntPtr swp) { _asm { align 4 push ebp mov ebp, esp push ebx push esi push edi // copy the data mov esi,[src] mov edi,[dst] mov ecx,[cnt] mov edx,[off] call CopyBlock // now swap it mov eax,[cnt] // eax = count remaining xor edx,edx // edx = dxt counter mov edi,[dst] mov ebx,[dxt] xor ecx,ecx // ecx = how much to copy dxt_test: add edi,8 dec eax jz end_dxt_test add edx,ebx jns dxt_test dxt_s_test: inc ecx dec eax jz end_dxt_test add edx,ebx js dxt_s_test // swap this data (ecx set, dst set) call [swp] // (ecx reset to 0 after) jmp dxt_test // and repeat end_dxt_test: // swap any remaining data call [swp] pop edi pop esi pop ebx mov esp, ebp pop ebp ret } } extern "C" __declspec(naked) void asmLoadTile(int src, int dst, int width, int height, int line, int off, int end) { _asm { align 4 push ebp mov ebp, esp push ebx push esi push edi // set initial values mov edi,[dst] mov ecx,[width] mov esi,[src] mov edx,[off] xor ebx,ebx // swap this line? mov eax,[height] loadtile_loop: cmp [end],edi // end of tmem: error jc loadtile_end // copy this line push edi push ecx call CopyBlock pop ecx // swap it? xor ebx,1 jnz loadtile_no_swap // (ecx set, restore edi) pop edi push ecx call SwapBlock32 pop ecx jmp loadtile_swap_end loadtile_no_swap: add sp,4 // forget edi, we are already at the next position loadtile_swap_end: add edx,[line] dec eax jnz loadtile_loop loadtile_end: pop edi pop esi pop ebx mov esp, ebp pop ebp ret } } //**************************************************************** // // ******** Texture CRC ******** // //**************************************************************** extern "C" __declspec(naked) int asmTextureCRC(int addr, int width, int height, int line) { _asm { align 4 push ebp mov ebp, esp push ebx push edi xor eax,eax // eax is final result mov ebx,[line] mov ecx,[height] // ecx is height counter mov edi,[addr] // edi is ptr to texture memory crc_loop_y: push ecx mov ecx,[width] crc_loop_x: add eax,[edi] // MUST be 64-bit aligned, so manually unroll add eax,[edi+4] mov edx,ecx mul edx add eax,edx add edi,8 dec ecx jnz crc_loop_x pop ecx mov edx,ecx mul edx add eax,edx add edi,ebx dec ecx jnz crc_loop_y pop edi pop ebx mov esp, ebp pop ebp ret } }