diff --git a/Source/Glide64/Glide64.vcproj b/Source/Glide64/Glide64.vcproj index d488e962a..13c16a145 100644 --- a/Source/Glide64/Glide64.vcproj +++ b/Source/Glide64/Glide64.vcproj @@ -270,10 +270,6 @@ RelativePath="TexModCI.h" > - - 0x0000000F + or ecx,eax + shl eax,4 + or ecx,eax + + mov eax,edx // 0x0F000000 -> 0x00000F00 + shr eax,16 + and eax,0x00000F00 + or ecx,eax + shl eax,4 + or ecx,eax + + mov eax,edx + shr eax,4 // 0x00F00000 -> 0x000F0000 + and eax,0x000F0000 + or ecx,eax + shl eax,4 + or ecx,eax + + mov eax,edx + shl eax,8 // 0x000F0000 -> 0x0F000000 + and eax,0x0F000000 + or ecx,eax + shl eax,4 + or ecx,eax + + mov [edi],ecx + add edi,4 + // } + + // 2nd dword { + xor ecx,ecx + mov eax,edx + shr eax,12 // 0x0000F000 -> 0x0000000F + and eax,0x0000000F + or ecx,eax + shl eax,4 + or ecx,eax + + mov eax,edx // 0x00000F00 -> 0x00000F00 + and eax,0x00000F00 + or ecx,eax + shl eax,4 + or ecx,eax + + mov eax,edx + shl eax,12 // 0x000000F0 -> 0x000F0000 + and eax,0x000F0000 + or ecx,eax + shl eax,4 + or ecx,eax + + shl edx,24 // 0x0000000F -> 0x0F000000 + and edx,0x0F000000 + or ecx,edx + shl edx,4 + or ecx,edx + + mov [edi],ecx + add edi,4 + // } + + // * copy + mov eax,[esi] // read all 8 pixels + bswap eax + add esi,4 + mov edx,eax + + // 1st dword { + xor ecx,ecx + shr eax,28 // 0xF0000000 -> 0x0000000F + or ecx,eax + shl eax,4 + or ecx,eax + + mov eax,edx // 0x0F000000 -> 0x00000F00 + shr eax,16 + and eax,0x00000F00 + or ecx,eax + shl eax,4 + or ecx,eax + + mov eax,edx + shr eax,4 // 0x00F00000 -> 0x000F0000 + and eax,0x000F0000 + or ecx,eax + shl eax,4 + or ecx,eax + + mov eax,edx + shl eax,8 // 0x000F0000 -> 0x0F000000 + and eax,0x0F000000 + or ecx,eax + shl eax,4 + or ecx,eax + + mov [edi],ecx + add edi,4 + // } + + // 2nd dword { + xor ecx,ecx + mov eax,edx + shr eax,12 // 0x0000F000 -> 0x0000000F + and eax,0x0000000F + or ecx,eax + shl eax,4 + or ecx,eax + + mov eax,edx // 0x00000F00 -> 0x00000F00 + and eax,0x00000F00 + or ecx,eax + shl eax,4 + or ecx,eax + + mov eax,edx + shl eax,12 // 0x000000F0 -> 0x000F0000 + and eax,0x000F0000 + or ecx,eax + shl eax,4 + or ecx,eax + + shl edx,24 // 0x0000000F -> 0x0F000000 + and edx,0x0F000000 + or ecx,edx + shl edx,4 + or ecx,edx + + mov [edi],ecx + add edi,4 + // } + // * + + pop ecx + dec ecx + jnz x_loop + + pop ecx + dec ecx + jz near end_y_loop + push ecx + + add esi,[line] + add edi,[ext] + + mov ecx,[wid_64] +x_loop_2: + push ecx + + mov eax,[esi+4] // read all 8 pixels + bswap eax + mov edx,eax + + // 1st dword { + xor ecx,ecx + shr eax,28 // 0xF0000000 -> 0x0000000F + or ecx,eax + shl eax,4 + or ecx,eax + + mov eax,edx // 0x0F000000 -> 0x00000F00 + shr eax,16 + and eax,0x00000F00 + or ecx,eax + shl eax,4 + or ecx,eax + + mov eax,edx + shr eax,4 // 0x00F00000 -> 0x000F0000 + and eax,0x000F0000 + or ecx,eax + shl eax,4 + or ecx,eax + + mov eax,edx + shl eax,8 // 0x000F0000 -> 0x0F000000 + and eax,0x0F000000 + or ecx,eax + shl eax,4 + or ecx,eax + + mov [edi],ecx + add edi,4 + // } + + // 2nd dword { + xor ecx,ecx + mov eax,edx + shr eax,12 // 0x0000F000 -> 0x0000000F + and eax,0x0000000F + or ecx,eax + shl eax,4 + or ecx,eax + + mov eax,edx // 0x00000F00 -> 0x00000F00 + and eax,0x00000F00 + or ecx,eax + shl eax,4 + or ecx,eax + + mov eax,edx + shl eax,12 // 0x000000F0 -> 0x000F0000 + and eax,0x000F0000 + or ecx,eax + shl eax,4 + or ecx,eax + + shl edx,24 // 0x0000000F -> 0x0F000000 + and edx,0x0F000000 + or ecx,edx + shl edx,4 + or ecx,edx + + mov [edi],ecx + add edi,4 + // } + + // * copy + mov eax,[esi] // read all 8 pixels + bswap eax + add esi,8 + mov edx,eax + + // 1st dword { + xor ecx,ecx + shr eax,28 // 0xF0000000 -> 0x0000000F + or ecx,eax + shl eax,4 + or ecx,eax + + mov eax,edx // 0x0F000000 -> 0x00000F00 + shr eax,16 + and eax,0x00000F00 + or ecx,eax + shl eax,4 + or ecx,eax + + mov eax,edx + shr eax,4 // 0x00F00000 -> 0x000F0000 + and eax,0x000F0000 + or ecx,eax + shl eax,4 + or ecx,eax + + mov eax,edx + shl eax,8 // 0x000F0000 -> 0x0F000000 + and eax,0x0F000000 + or ecx,eax + shl eax,4 + or ecx,eax + + mov [edi],ecx + add edi,4 + // } + + // 2nd dword { + xor ecx,ecx + mov eax,edx + shr eax,12 // 0x0000F000 -> 0x0000000F + and eax,0x0000000F + or ecx,eax + shl eax,4 + or ecx,eax + + mov eax,edx // 0x00000F00 -> 0x00000F00 + and eax,0x00000F00 + or ecx,eax + shl eax,4 + or ecx,eax + + mov eax,edx + shl eax,12 // 0x000000F0 -> 0x000F0000 + and eax,0x000F0000 + or ecx,eax + shl eax,4 + or ecx,eax + + shl edx,24 // 0x0000000F -> 0x0F000000 + and edx,0x0F000000 + or ecx,edx + shl edx,4 + or ecx,edx + + mov [edi],ecx + add edi,4 + // } + // * + + pop ecx + dec ecx + jnz x_loop_2 + + add esi,[line] + add edi,[ext] + + pop ecx + dec ecx + jnz y_loop + +end_y_loop: + pop edi + pop esi + pop ebx + mov esp, ebp + pop ebp + ret + } +} //**************************************************************** // Size: 0, Format: 2 diff --git a/Source/Glide64/TexLoad8b.h b/Source/Glide64/TexLoad8b.h index d0477877b..388e55e7c 100644 --- a/Source/Glide64/TexLoad8b.h +++ b/Source/Glide64/TexLoad8b.h @@ -37,10 +37,630 @@ // //**************************************************************** -extern "C" void asmLoad8bCI (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext, wxUIntPtr pal); -extern "C" void asmLoad8bIA8 (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext, wxUIntPtr pal); -extern "C" void asmLoad8bIA4 (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext); -extern "C" void asmLoad8bI (wxUIntPtr src, int dst, wxUIntPtr wid_64, int height, int line, int ext); +extern "C" void __declspec(naked) asmLoad8bCI (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext, wxUIntPtr pal) +{ + _asm { + push ebp + mov ebp, esp + push ebx + push esi + push edi + + mov ebx,[pal] + mov esi,[src] + mov edi,[dst] + mov ecx,[height] +y_loop: + push ecx + mov ecx,[wid_64] +x_loop: + push ecx + + mov eax,[esi] // read all 4 pixels + bswap eax + add esi,4 + mov edx,eax + + // 1st dword output { + shr eax,15 + and eax,0x1FE + mov cx,[ebx+eax] + ror cx,1 + shl ecx,16 + + mov eax,edx + shr eax,23 + and eax,0x1FE + mov cx,[ebx+eax] + ror cx,1 + + mov [edi],ecx + add edi,4 + // } + + // 2nd dword output { + mov eax,edx + shl eax,1 + and eax,0x1FE + mov cx,[ebx+eax] + ror cx,1 + shl ecx,16 + + shr edx,7 + and edx,0x1FE + mov cx,[ebx+edx] + ror cx,1 + + mov [edi],ecx + add edi,4 + // } + + // * copy + mov eax,[esi] // read all 4 pixels + bswap eax + add esi,4 + mov edx,eax + + // 1st dword output { + shr eax,15 + and eax,0x1FE + mov cx,[ebx+eax] + ror cx,1 + shl ecx,16 + + mov eax,edx + shr eax,23 + and eax,0x1FE + mov cx,[ebx+eax] + ror cx,1 + + mov [edi],ecx + add edi,4 + // } + + // 2nd dword output { + mov eax,edx + shl eax,1 + and eax,0x1FE + mov cx,[ebx+eax] + ror cx,1 + shl ecx,16 + + shr edx,7 + and edx,0x1FE + mov cx,[ebx+edx] + ror cx,1 + + mov [edi],ecx + add edi,4 + // } + // * + + pop ecx + + dec ecx + jnz x_loop + + pop ecx + dec ecx + jz near end_y_loop + push ecx + + mov eax,esi + add eax,[line] + mov esi,[src] + sub eax,esi + and eax,0x7FF + add esi,eax + add edi,[ext] + + mov ecx,[wid_64] +x_loop_2: + push ecx + + mov eax,[esi+4] // read all 4 pixels + bswap eax + mov edx,eax + + // 1st dword output { + shr eax,15 + and eax,0x1FE + mov cx,[ebx+eax] + ror cx,1 + shl ecx,16 + + mov eax,edx + shr eax,23 + and eax,0x1FE + mov cx,[ebx+eax] + ror cx,1 + + mov [edi],ecx + add edi,4 + // } + + // 2nd dword output { + mov eax,edx + shl eax,1 + and eax,0x1FE + mov cx,[ebx+eax] + ror cx,1 + shl ecx,16 + + shr edx,7 + and edx,0x1FE + mov cx,[ebx+edx] + ror cx,1 + + mov [edi],ecx + add edi,4 + // } + + // * copy + mov eax,[esi] // read all 4 pixels + bswap eax + mov edx,esi + add edx,8 + mov esi,[src] + sub edx,esi + and edx,0x7FF + add esi,edx + mov edx,eax + + // 1st dword output { + shr eax,15 + and eax,0x1FE + mov cx,[ebx+eax] + ror cx,1 + shl ecx,16 + + mov eax,edx + shr eax,23 + and eax,0x1FE + mov cx,[ebx+eax] + ror cx,1 + + mov [edi],ecx + add edi,4 + // } + + // 2nd dword output { + mov eax,edx + shl eax,1 + and eax,0x1FE + mov cx,[ebx+eax] + ror cx,1 + shl ecx,16 + + shr edx,7 + and edx,0x1FE + mov cx,[ebx+edx] + ror cx,1 + + mov [edi],ecx + add edi,4 + // } + // * + + pop ecx + + dec ecx + jnz x_loop_2 + + mov eax,esi + add eax,[line] + mov esi,[src] + sub eax,esi + and eax,0x7FF + add esi,eax + add edi,[ext] + + pop ecx + dec ecx + jnz y_loop + +end_y_loop: + pop edi + pop esi + pop ebx + mov esp, ebp + pop ebp + ret + } +} + +extern "C" void __declspec(naked) asmLoad8bIA8 (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext, wxUIntPtr pal) +{ + _asm { + push ebp + mov ebp, esp + push ebx + push esi + push edi + + mov ebx,[pal] + mov esi,[src] + mov edi,[dst] + mov ecx,[height] +y_loop: + push ecx + mov ecx,[wid_64] +x_loop: + push ecx + + mov eax,[esi] // read all 4 pixels + bswap eax + add esi,4 + mov edx,eax + + // 1st dword output { + shr eax,15 + and eax,0x1FE + mov cx,[ebx+eax] + ror cx,8 + shl ecx,16 + + mov eax,edx + shr eax,23 + and eax,0x1FE + mov cx,[ebx+eax] + ror cx,8 + + mov [edi],ecx + add edi,4 + // } + + // 2nd dword output { + mov eax,edx + shl eax,1 + and eax,0x1FE + mov cx,[ebx+eax] + ror cx,8 + shl ecx,16 + + shr edx,7 + and edx,0x1FE + mov cx,[ebx+edx] + ror cx,8 + + mov [edi],ecx + add edi,4 + // } + + // * copy + mov eax,[esi] // read all 4 pixels + bswap eax + add esi,4 + mov edx,eax + + // 1st dword output { + shr eax,15 + and eax,0x1FE + mov cx,[ebx+eax] + ror cx,8 + shl ecx,16 + + mov eax,edx + shr eax,23 + and eax,0x1FE + mov cx,[ebx+eax] + ror cx,8 + + mov [edi],ecx + add edi,4 + // } + + // 2nd dword output { + mov eax,edx + shl eax,1 + and eax,0x1FE + mov cx,[ebx+eax] + ror cx,8 + shl ecx,16 + + shr edx,7 + and edx,0x1FE + mov cx,[ebx+edx] + ror cx,8 + + mov [edi],ecx + add edi,4 + // } + // * + + pop ecx + + dec ecx + jnz x_loop + + pop ecx + dec ecx + jz near end_y_loop + push ecx + + add esi,[line] + add edi,[ext] + + mov ecx,[wid_64] +x_loop_2: + push ecx + + mov eax,[esi+4] // read all 4 pixels + bswap eax + mov edx,eax + + // 1st dword output { + shr eax,15 + and eax,0x1FE + mov cx,[ebx+eax] + ror cx,8 + shl ecx,16 + + mov eax,edx + shr eax,23 + and eax,0x1FE + mov cx,[ebx+eax] + ror cx,8 + + mov [edi],ecx + add edi,4 + // } + + // 2nd dword output { + mov eax,edx + shl eax,1 + and eax,0x1FE + mov cx,[ebx+eax] + ror cx,8 + shl ecx,16 + + shr edx,7 + and edx,0x1FE + mov cx,[ebx+edx] + ror cx,8 + + mov [edi],ecx + add edi,4 + // } + + // * copy + mov eax,[esi] // read all 4 pixels + bswap eax + add esi,8 + mov edx,eax + + // 1st dword output { + shr eax,15 + and eax,0x1FE + mov cx,[ebx+eax] + ror cx,8 + shl ecx,16 + + mov eax,edx + shr eax,23 + and eax,0x1FE + mov cx,[ebx+eax] + ror cx,8 + + mov [edi],ecx + add edi,4 + // } + + // 2nd dword output { + mov eax,edx + shl eax,1 + and eax,0x1FE + mov cx,[ebx+eax] + ror cx,8 + shl ecx,16 + + shr edx,7 + and edx,0x1FE + mov cx,[ebx+edx] + ror cx,8 + + mov [edi],ecx + add edi,4 + // } + // * + + pop ecx + + dec ecx + jnz x_loop_2 + + add esi,[line] + add edi,[ext] + + pop ecx + dec ecx + jnz y_loop + +end_y_loop: + pop edi + pop esi + pop ebx + mov esp, ebp + pop ebp + ret + } +} + +extern "C" void __declspec(naked) asmLoad8bIA4 (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext) +{ + _asm { + push ebp + mov ebp, esp + push ebx + push esi + push edi + + mov esi,[src] + mov edi,[dst] + mov ecx,[height] +y_loop: + push ecx + mov ecx,[wid_64] +x_loop: + mov eax,[esi] // read all 4 pixels + mov edx,eax + + shr eax,4 //all alpha + shl edx,4 + and eax,0x0F0F0F0F + and edx,0xF0F0F0F0 + add esi,4 + or eax,edx + + mov [edi],eax // save dword + add edi,4 + + mov eax,[esi] // read all 4 pixels + mov edx,eax + + shr eax,4 //all alpha + shl edx,4 + and eax,0x0F0F0F0F + and edx,0xF0F0F0F0 + add esi,4 + or eax,edx + + mov [edi],eax // save dword + add edi,4 + // * + + dec ecx + jnz x_loop + + pop ecx + dec ecx + jz end_y_loop + push ecx + + add esi,[line] + add edi,[ext] + + mov ecx,[wid_64] +x_loop_2: + mov eax,[esi+4] // read both pixels + mov edx,eax + + shr eax,4 //all alpha + shl edx,4 + and eax,0x0F0F0F0F + and edx,0xF0F0F0F0 + or eax,edx + + mov [edi],eax //save dword + add edi,4 + + mov eax,[esi] // read both pixels + add esi,8 + mov edx,eax + + shr eax,4 //all alpha + shl edx,4 + and eax,0x0F0F0F0F + and edx,0xF0F0F0F0 + or eax,edx + + mov [edi],eax //save dword + add edi,4 + // * + + dec ecx + jnz x_loop_2 + + add esi,[line] + add edi,[ext] + + pop ecx + dec ecx + jnz y_loop + +end_y_loop: + pop edi + pop esi + pop ebx + mov esp, ebp + pop ebp + ret + } +} + +extern "C" void __declspec(naked) asmLoad8bI (wxUIntPtr src, int dst, wxUIntPtr wid_64, int height, int line, int ext) +{ + _asm { + push ebp + mov ebp, esp + push ebx + push esi + push edi + + mov esi,[src] + mov edi,[dst] + mov ecx,[height] +y_loop: + push ecx + mov ecx,[wid_64] +x_loop: + mov eax,[esi] // read all 4 pixels + add esi,4 + + mov [edi],eax // save dword + add edi,4 + + mov eax,[esi] // read all 4 pixels + add esi,4 + + mov [edi],eax // save dword + add edi,4 + // * + + dec ecx + jnz x_loop + + pop ecx + dec ecx + jz end_y_loop + push ecx + + add esi,[line] + add edi,[ext] + + mov ecx,[wid_64] +x_loop_2: + mov eax,[esi+4] // read both pixels + + mov [edi],eax //save dword + add edi,4 + + mov eax,[esi] // read both pixels + add esi,8 + + mov [edi],eax //save dword + add edi,4 + // * + + dec ecx + jnz x_loop_2 + + add esi,[line] + add edi,[ext] + + pop ecx + dec ecx + jnz y_loop + +end_y_loop: + pop edi + pop esi + pop ebx + mov esp, ebp + pop ebp + ret + } +} //**************************************************************** // Size: 1, Format: 2 diff --git a/Source/Glide64/Texture.asm.cpp b/Source/Glide64/Texture.asm.cpp deleted file mode 100644 index fbfdabea1..000000000 --- a/Source/Glide64/Texture.asm.cpp +++ /dev/null @@ -1,3857 +0,0 @@ -/* -* Glide64 - Glide video plugin for Nintendo 64 emulators. -* -* This program is free software; you can redistribute it and/or modify -* it under the terms of the GNU General Public License as published by -* the Free Software Foundation; either version 2 of the License, or -* any later version. -* -* This program is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU General Public License for more details. -* -* You should have received a copy of the GNU General Public License -* along with this program; if not, write to the Free Software -* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -*/ - -/**************************************************************** - - Glide64 - Glide Plugin for Nintendo 64 emulators - Project started on December 29th, 2001 - - Authors: - Dave2001, original author, founded the project in 2001, left it in 2002 - Gugaman, joined the project in 2002, left it in 2002 - Sergey 'Gonetz' Lipski, joined the project in 2002, main author since fall of 2002 - Hiroshi 'KoolSmoky' Morii, joined the project in 2007 - -**************************************************************** - - To modify Glide64: - * Write your name and (optional)email, commented by your work, so I know who did it, and so that you can find which parts you modified when it comes time to send it to me. - * Do NOT send me the whole project or file that you modified. Take out your modified code sections, and tell me where to put them. If people sent the whole thing, I would have many different versions, but no idea how to combine them all. - -**************************************************************** -*/ - -#include "Gfx_1.3.h" - -/**************************************************************** - - ******** Textures load ******** - -****************************************************************/ - - -/***************************************************************** -4b textures load -*****************************************************************/ - - -/**************************************************************** - Size: 0, Format: 2 - 2009 ported to NASM - Sergey (Gonetz) Lipski - *****************************************************************/ -extern "C" void __declspec(naked) asmLoad4bCI (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext, wxUIntPtr pal) -{ - _asm { - push ebp - mov ebp, esp - push ebx - push esi - push edi - - mov ebx,[pal] - mov esi,[src] - mov edi,[dst] - mov ecx,[height] -y_loop: - push ecx - mov ecx,[wid_64] -x_loop: - push ecx - - mov eax,[esi] // read all 8 pixels - bswap eax - add esi,4 - mov edx,eax - - // 1st dword output { - shr eax,23 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,1 - shl ecx,16 - - mov eax,edx - shr eax,27 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,1 - - mov [edi],ecx - add edi,4 - // } - - // 2nd dword output { - mov eax,edx - shr eax,15 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,1 - shl ecx,16 - - mov eax,edx - shr eax,19 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,1 - - mov [edi],ecx - add edi,4 - // } - - // 3rd dword output { - mov eax,edx - shr eax,7 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,1 - shl ecx,16 - - mov eax,edx - shr eax,11 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,1 - - mov [edi],ecx - add edi,4 - // } - - // 4th dword output { - mov eax,edx - shl eax,1 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,1 - shl ecx,16 - - shr edx,3 - and edx,0x1E - mov cx,[ebx+edx] - ror cx,1 - - mov [edi],ecx - add edi,4 - // } - - // * copy - mov eax,[esi] // read all 8 pixels - bswap eax - add esi,4 - mov edx,eax - - // 1st dword output { - shr eax,23 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,1 - shl ecx,16 - - mov eax,edx - shr eax,27 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,1 - - mov [edi],ecx - add edi,4 - // } - - // 2nd dword output { - mov eax,edx - shr eax,15 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,1 - shl ecx,16 - - mov eax,edx - shr eax,19 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,1 - - mov [edi],ecx - add edi,4 - // } - - // 3rd dword output { - mov eax,edx - shr eax,7 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,1 - shl ecx,16 - - mov eax,edx - shr eax,11 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,1 - - mov [edi],ecx - add edi,4 - // } - - // 4th dword output { - mov eax,edx - shl eax,1 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,1 - shl ecx,16 - - shr edx,3 - and edx,0x1E - mov cx,[ebx+edx] - ror cx,1 - - mov [edi],ecx - add edi,4 - // } - // * - - pop ecx - - dec ecx - jnz x_loop - - pop ecx - dec ecx - jz near end_y_loop - push ecx - - mov eax,esi - add eax,[line] - mov esi,[src] - sub eax,esi - and eax,0x7FF - add esi,eax - add edi,[ext] - - mov ecx,[wid_64] - x_loop_2: - push ecx - - mov eax,[esi+4] // read all 8 pixels - bswap eax - mov edx,eax - - // 1st dword output { - shr eax,23 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,1 - shl ecx,16 - - mov eax,edx - shr eax,27 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,1 - - mov [edi],ecx - add edi,4 - // } - - // 2nd dword output { - mov eax,edx - shr eax,15 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,1 - shl ecx,16 - - mov eax,edx - shr eax,19 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,1 - - mov [edi],ecx - add edi,4 - // } - - // 3rd dword output { - mov eax,edx - shr eax,7 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,1 - shl ecx,16 - - mov eax,edx - shr eax,11 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,1 - - mov [edi],ecx - add edi,4 - // } - - // 4th dword output { - mov eax,edx - shl eax,1 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,1 - shl ecx,16 - - shr edx,3 - and edx,0x1E - mov cx,[ebx+edx] - ror cx,1 - - mov [edi],ecx - add edi,4 - // } - - // * copy - mov eax,[esi] // read all 8 pixels - bswap eax - mov edx,esi - add edx,8 - mov esi,[src] - sub edx,esi - and edx,0x7FF - add esi,edx - mov edx,eax - - // 1st dword output { - shr eax,23 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,1 - shl ecx,16 - - mov eax,edx - shr eax,27 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,1 - - mov [edi],ecx - add edi,4 - // } - - // 2nd dword output { - mov eax,edx - shr eax,15 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,1 - shl ecx,16 - - mov eax,edx - shr eax,19 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,1 - - mov [edi],ecx - add edi,4 - // } - - // 3rd dword output { - mov eax,edx - shr eax,7 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,1 - shl ecx,16 - - mov eax,edx - shr eax,11 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,1 - - mov [edi],ecx - add edi,4 - // } - - // 4th dword output { - mov eax,edx - shl eax,1 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,1 - shl ecx,16 - - shr edx,3 - and edx,0x1E - mov cx,[ebx+edx] - ror cx,1 - - mov [edi],ecx - add edi,4 - // } - // * - - pop ecx - - dec ecx - jnz x_loop_2 - - mov eax,esi - add eax,[line] - mov esi,[src] - sub eax,esi - and eax,0x7FF - add esi,eax - add edi,[ext] - - pop ecx - dec ecx - jnz y_loop - -end_y_loop: - pop edi - pop esi - pop ebx - mov esp, ebp - pop ebp - ret - } -} - -extern "C" void __declspec(naked) asmLoad4bIAPal (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext, wxUIntPtr pal) -{ - _asm { - push ebp - mov ebp, esp - push ebx - push esi - push edi - - mov ebx,[pal] - mov esi,[src] - mov edi,[dst] - mov ecx,[height] -y_loop: - push ecx - mov ecx,[wid_64] -x_loop: - push ecx - - mov eax,[esi] // read all 8 pixels - bswap eax - add esi,4 - mov edx,eax - - // 1st dword output { - shr eax,23 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,8 - shl ecx,16 - - mov eax,edx - shr eax,27 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,8 - - mov [edi],ecx - add edi,4 - // } - - // 2nd dword output { - mov eax,edx - shr eax,15 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,8 - shl ecx,16 - - mov eax,edx - shr eax,19 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,8 - - mov [edi],ecx - add edi,4 - // } - - // 3rd dword output { - mov eax,edx - shr eax,7 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,8 - shl ecx,16 - - mov eax,edx - shr eax,11 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,8 - - mov [edi],ecx - add edi,4 - // } - - // 4th dword output { - mov eax,edx - shl eax,1 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,8 - shl ecx,16 - - shr edx,3 - and edx,0x1E - mov cx,[ebx+edx] - ror cx,8 - - mov [edi],ecx - add edi,4 - // } - - // * copy - mov eax,[esi] // read all 8 pixels - bswap eax - add esi,4 - mov edx,eax - - // 1st dword output { - shr eax,23 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,8 - shl ecx,16 - - mov eax,edx - shr eax,27 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,8 - - mov [edi],ecx - add edi,4 - // } - - // 2nd dword output { - mov eax,edx - shr eax,15 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,8 - shl ecx,16 - - mov eax,edx - shr eax,19 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,8 - - mov [edi],ecx - add edi,4 - // } - - // 3rd dword output { - mov eax,edx - shr eax,7 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,8 - shl ecx,16 - - mov eax,edx - shr eax,11 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,8 - - mov [edi],ecx - add edi,4 - // } - - // 4th dword output { - mov eax,edx - shl eax,1 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,8 - shl ecx,16 - - shr edx,3 - and edx,0x1E - mov cx,[ebx+edx] - ror cx,8 - - mov [edi],ecx - add edi,4 - // } - // * - - pop ecx - - dec ecx - jnz x_loop - - pop ecx - dec ecx - jz near end_y_loop - push ecx - - mov eax,esi - add eax,[line] - mov esi,[src] - sub eax,esi - and eax,0x7FF - add esi,eax - add edi,[ext] - - mov ecx,[wid_64] -x_loop_2: - push ecx - - mov eax,[esi+4] // read all 8 pixels - bswap eax - mov edx,eax - - // 1st dword output { - shr eax,23 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,8 - shl ecx,16 - - mov eax,edx - shr eax,27 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,8 - - mov [edi],ecx - add edi,4 - // } - - // 2nd dword output { - mov eax,edx - shr eax,15 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,8 - shl ecx,16 - - mov eax,edx - shr eax,19 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,8 - - mov [edi],ecx - add edi,4 - // } - - // 3rd dword output { - mov eax,edx - shr eax,7 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,8 - shl ecx,16 - - mov eax,edx - shr eax,11 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,8 - - mov [edi],ecx - add edi,4 - // } - - // 4th dword output { - mov eax,edx - shl eax,1 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,8 - shl ecx,16 - - shr edx,3 - and edx,0x1E - mov cx,[ebx+edx] - ror cx,8 - - mov [edi],ecx - add edi,4 - // } - - // * copy - mov eax,[esi] // read all 8 pixels - bswap eax - mov edx,esi - add edx,8 - mov esi,[src] - sub edx,esi - and edx,0x7FF - add esi,edx - mov edx,eax - - // 1st dword output { - shr eax,23 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,8 - shl ecx,16 - - mov eax,edx - shr eax,27 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,8 - - mov [edi],ecx - add edi,4 - // } - - // 2nd dword output { - mov eax,edx - shr eax,15 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,8 - shl ecx,16 - - mov eax,edx - shr eax,19 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,8 - - mov [edi],ecx - add edi,4 - // } - - // 3rd dword output { - mov eax,edx - shr eax,7 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,8 - shl ecx,16 - - mov eax,edx - shr eax,11 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,8 - - mov [edi],ecx - add edi,4 - // } - - // 4th dword output { - mov eax,edx - shl eax,1 - and eax,0x1E - mov cx,[ebx+eax] - ror cx,8 - shl ecx,16 - - shr edx,3 - and edx,0x1E - mov cx,[ebx+edx] - ror cx,8 - - mov [edi],ecx - add edi,4 - // } - // * - - pop ecx - - dec ecx - jnz x_loop_2 - - mov eax,esi - add eax,[line] - mov esi,[src] - sub eax,esi - and eax,0x7FF - add esi,eax - add edi,[ext] - - pop ecx - dec ecx - jnz y_loop - -end_y_loop: - pop edi - pop esi - pop ebx - mov esp, ebp - pop ebp - ret - } -} - -/***************************************************************** - Size: 0, Format: 3 - - ** BY GUGAMAN ** - 2009 ported to NASM - Sergey (Gonetz) Lipski -*****************************************************************/ -extern "C" void __declspec(naked) asmLoad4bIA (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext) -{ - _asm { - push ebp - mov ebp, esp - push ebx - push esi - push edi - - mov esi,[src] - mov edi,[dst] - mov ecx,[height] -y_loop: - push ecx - mov ecx,[wid_64] -x_loop: - push ecx - - mov eax,[esi] // read all 8 pixels - bswap eax - add esi,4 - mov edx,eax - - // 1st dword { - xor ecx,ecx - - // pixel #1 - // IIIAxxxxxxxxxxxxxxxxxxxxxxxxxxxx - // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII - mov eax,edx - shr eax,24 //Alpha - and eax,0x00000010 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - mov eax,edx - shr eax,28 // Intensity - and eax,0x0000000E - or ecx,eax - shr eax,3 - or ecx,eax - - // pixel #2 - // xxxxIIIAxxxxxxxxxxxxxxxxxxxxxxxx - // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx - mov eax,edx - shr eax,12 //Alpha - and eax,0x00001000 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - mov eax,edx - shr eax,16 // Intensity - and eax,0x00000E00 - or ecx,eax - shr eax,3 - and eax,0x00000100 - or ecx,eax - - // pixel #3 - // xxxxxxxxIIIAxxxxxxxxxxxxxxxxxxxx - // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx - //Alpha - mov eax,edx - and eax,0x00100000 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - mov eax,edx - shr eax,4 // Intensity - and eax,0x000E0000 - or ecx,eax - shr eax,3 - and eax,0x00010000 - or ecx,eax - - // pixel #4 - // xxxxxxxxxxxxIIIAxxxxxxxxxxxxxxxx - // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx - mov eax,edx - shl eax,12 //Alpha - and eax,0x10000000 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - mov eax,edx - shl eax,8 // Intensity - and eax,0x0E000000 - or ecx,eax - shr eax,3 - and eax,0x01000000 - or ecx,eax - - - mov [edi],ecx - add edi,4 - // } - -// 2nd dword { - xor ecx,ecx - - // pixel #5 - // xxxxxxxxxxxxxxxxIIIAxxxxxxxxxxxx - // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII - mov eax,edx - shr eax,8 //Alpha - and eax,0x00000010 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - mov eax,edx - shr eax,12 // Intensity - and eax,0x0000000E - or ecx,eax - shr eax,3 - or ecx,eax - - // pixel #6 - // xxxxxxxxxxxxxxxxxxxxIIIAxxxxxxxx - // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx - //Alpha - mov eax,edx - shl eax,4 - and eax,0x00001000 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - mov eax,edx // Intensity - and eax,0x00000E00 - or ecx,eax - shr eax,3 - and eax,0x00000100 - or ecx,eax - - // pixel #7 - // xxxxxxxxxxxxxxxxxxxxxxxxIIIAxxxx - // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx - //Alpha - mov eax,edx - shl eax,16 - and eax,0x00100000 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - mov eax,edx - shl eax,12 // Intensity - and eax,0x000E0000 - or ecx,eax - shr eax,3 - and eax,0x00010000 - or ecx,eax - - // pixel #8 - // xxxxxxxxxxxxxxxxxxxxxxxxxxxxIIIA - // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx - mov eax,edx - shl eax,28 //Alpha - and eax,0x10000000 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - mov eax,edx - shl eax,24 // Intensity - and eax,0x0E000000 - or ecx,eax - shr eax,3 - and eax,0x01000000 - or ecx,eax - - mov [edi],ecx - add edi,4 - // } - - // * copy - mov eax,[esi] // read all 8 pixels - bswap eax - add esi,4 - mov edx,eax - - // 1st dword { - xor ecx,ecx - - // pixel #1 - // IIIAxxxxxxxxxxxxxxxxxxxxxxxxxxxx - // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII - mov eax,edx - shr eax,24 //Alpha - and eax,0x00000010 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - mov eax,edx - shr eax,28 // Intensity - and eax,0x0000000E - or ecx,eax - shr eax,3 - or ecx,eax - - // pixel #2 - // xxxxIIIAxxxxxxxxxxxxxxxxxxxxxxxx - // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx - mov eax,edx - shr eax,12 //Alpha - and eax,0x00001000 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - mov eax,edx - shr eax,16 // Intensity - and eax,0x00000E00 - or ecx,eax - shr eax,3 - and eax,0x00000100 - or ecx,eax - - // pixel #3 - // xxxxxxxxIIIAxxxxxxxxxxxxxxxxxxxx - // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx - //Alpha - mov eax,edx - and eax,0x00100000 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - mov eax,edx - shr eax,4 // Intensity - and eax,0x000E0000 - or ecx,eax - shr eax,3 - and eax,0x00010000 - or ecx,eax - - // pixel #4 - // xxxxxxxxxxxxIIIAxxxxxxxxxxxxxxxx - // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx - mov eax,edx - shl eax,12 //Alpha - and eax,0x10000000 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - mov eax,edx - shl eax,8 // Intensity - and eax,0x0E000000 - or ecx,eax - shr eax,3 - and eax,0x01000000 - or ecx,eax - - - mov [edi],ecx - add edi,4 - // } - -// 2nd dword { - xor ecx,ecx - - // pixel #5 - // xxxxxxxxxxxxxxxxIIIAxxxxxxxxxxxx - // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII - mov eax,edx - shr eax,8 //Alpha - and eax,0x00000010 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - mov eax,edx - shr eax,12 // Intensity - and eax,0x0000000E - or ecx,eax - shr eax,3 - or ecx,eax - - // pixel #6 - // xxxxxxxxxxxxxxxxxxxxIIIAxxxxxxxx - // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx - //Alpha - mov eax,edx - shl eax,4 - and eax,0x00001000 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - mov eax,edx // Intensity - and eax,0x00000E00 - or ecx,eax - shr eax,3 - and eax,0x00000100 - or ecx,eax - - // pixel #7 - // xxxxxxxxxxxxxxxxxxxxxxxxIIIAxxxx - // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx - //Alpha - mov eax,edx - shl eax,16 - and eax,0x00100000 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - mov eax,edx - shl eax,12 // Intensity - and eax,0x000E0000 - or ecx,eax - shr eax,3 - and eax,0x00010000 - or ecx,eax - - // pixel #8 - // xxxxxxxxxxxxxxxxxxxxxxxxxxxxIIIA - // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx - mov eax,edx - shl eax,28 //Alpha - and eax,0x10000000 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - mov eax,edx - shl eax,24 // Intensity - and eax,0x0E000000 - or ecx,eax - shr eax,3 - and eax,0x01000000 - or ecx,eax - - mov [edi],ecx - add edi,4 - // } - - // * - - pop ecx - dec ecx - jnz x_loop - - pop ecx - dec ecx - jz near end_y_loop - push ecx - - add esi,[line] - add edi,[ext] - - mov ecx,[wid_64] -x_loop_2: - push ecx - - mov eax,[esi+4] // read all 8 pixels - bswap eax - mov edx,eax - - // 1st dword { - xor ecx,ecx - - // pixel #1 - // IIIAxxxxxxxxxxxxxxxxxxxxxxxxxxxx - // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII - mov eax,edx - shr eax,24 //Alpha - and eax,0x00000010 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - mov eax,edx - shr eax,28 // Intensity - and eax,0x0000000E - or ecx,eax - shr eax,3 - or ecx,eax - - // pixel #2 - // xxxxIIIAxxxxxxxxxxxxxxxxxxxxxxxx - // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx - mov eax,edx - shr eax,12 //Alpha - and eax,0x00001000 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - mov eax,edx - shr eax,16 // Intensity - and eax,0x00000E00 - or ecx,eax - shr eax,3 - and eax,0x00000100 - or ecx,eax - - // pixel #3 - // xxxxxxxxIIIAxxxxxxxxxxxxxxxxxxxx - // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx - //Alpha - mov eax,edx - and eax,0x00100000 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - mov eax,edx - shr eax,4 // Intensity - and eax,0x000E0000 - or ecx,eax - shr eax,3 - and eax,0x00010000 - or ecx,eax - - // pixel #4 - // xxxxxxxxxxxxIIIAxxxxxxxxxxxxxxxx - // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx - mov eax,edx - shl eax,12 //Alpha - and eax,0x10000000 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - mov eax,edx - shl eax,8 // Intensity - and eax,0x0E000000 - or ecx,eax - shr eax,3 - and eax,0x01000000 - or ecx,eax - - - mov [edi],ecx - add edi,4 - // } - -// 2nd dword { - xor ecx,ecx - - // pixel #5 - // xxxxxxxxxxxxxxxxIIIAxxxxxxxxxxxx - // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII - mov eax,edx - shr eax,8 //Alpha - and eax,0x00000010 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - mov eax,edx - shr eax,12 // Intensity - and eax,0x0000000E - or ecx,eax - shr eax,3 - or ecx,eax - - // pixel #6 - // xxxxxxxxxxxxxxxxxxxxIIIAxxxxxxxx - // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx - //Alpha - mov eax,edx - shl eax,4 - and eax,0x00001000 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - mov eax,edx // Intensity - and eax,0x00000E00 - or ecx,eax - shr eax,3 - and eax,0x00000100 - or ecx,eax - - // pixel #7 - // xxxxxxxxxxxxxxxxxxxxxxxxIIIAxxxx - // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx - //Alpha - mov eax,edx - shl eax,16 - and eax,0x00100000 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - mov eax,edx - shl eax,12 // Intensity - and eax,0x000E0000 - or ecx,eax - shr eax,3 - and eax,0x00010000 - or ecx,eax - - // pixel #8 - // xxxxxxxxxxxxxxxxxxxxxxxxxxxxIIIA - // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx - mov eax,edx - shl eax,28 //Alpha - and eax,0x10000000 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - mov eax,edx - shl eax,24 // Intensity - and eax,0x0E000000 - or ecx,eax - shr eax,3 - and eax,0x01000000 - or ecx,eax - - mov [edi],ecx - add edi,4 - // } - - // * copy - mov eax,[esi] // read all 8 pixels - bswap eax - add esi,8 - mov edx,eax - -// 1st dword { - xor ecx,ecx - - // pixel #1 - // IIIAxxxxxxxxxxxxxxxxxxxxxxxxxxxx - // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII - mov eax,edx - shr eax,24 //Alpha - and eax,0x00000010 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - mov eax,edx - shr eax,28 // Intensity - and eax,0x0000000E - or ecx,eax - shr eax,3 - or ecx,eax - - // pixel #2 - // xxxxIIIAxxxxxxxxxxxxxxxxxxxxxxxx - // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx - mov eax,edx - shr eax,12 //Alpha - and eax,0x00001000 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - mov eax,edx - shr eax,16 // Intensity - and eax,0x00000E00 - or ecx,eax - shr eax,3 - and eax,0x00000100 - or ecx,eax - - // pixel #3 - // xxxxxxxxIIIAxxxxxxxxxxxxxxxxxxxx - // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx - //Alpha - mov eax,edx - and eax,0x00100000 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - mov eax,edx - shr eax,4 // Intensity - and eax,0x000E0000 - or ecx,eax - shr eax,3 - and eax,0x00010000 - or ecx,eax - - // pixel #4 - // xxxxxxxxxxxxIIIAxxxxxxxxxxxxxxxx - // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx - mov eax,edx - shl eax,12 //Alpha - and eax,0x10000000 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - mov eax,edx - shl eax,8 // Intensity - and eax,0x0E000000 - or ecx,eax - shr eax,3 - and eax,0x01000000 - or ecx,eax - - - mov [edi],ecx - add edi,4 - // } - -// 2nd dword { - xor ecx,ecx - - // pixel #5 - // xxxxxxxxxxxxxxxxIIIAxxxxxxxxxxxx - // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII - mov eax,edx - shr eax,8 //Alpha - and eax,0x00000010 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - mov eax,edx - shr eax,12 // Intensity - and eax,0x0000000E - or ecx,eax - shr eax,3 - or ecx,eax - - // pixel #6 - // xxxxxxxxxxxxxxxxxxxxIIIAxxxxxxxx - // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx - //Alpha - mov eax,edx - shl eax,4 - and eax,0x00001000 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - mov eax,edx // Intensity - and eax,0x00000E00 - or ecx,eax - shr eax,3 - and eax,0x00000100 - or ecx,eax - - // pixel #7 - // xxxxxxxxxxxxxxxxxxxxxxxxIIIAxxxx - // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx - //Alpha - mov eax,edx - shl eax,16 - and eax,0x00100000 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - mov eax,edx - shl eax,12 // Intensity - and eax,0x000E0000 - or ecx,eax - shr eax,3 - and eax,0x00010000 - or ecx,eax - - // pixel #8 - // xxxxxxxxxxxxxxxxxxxxxxxxxxxxIIIA - // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx - mov eax,edx - shl eax,28 //Alpha - and eax,0x10000000 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - shl eax,1 - or ecx,eax - mov eax,edx - shl eax,24 // Intensity - and eax,0x0E000000 - or ecx,eax - shr eax,3 - and eax,0x01000000 - or ecx,eax - - mov [edi],ecx - add edi,4 - // } - // * - - pop ecx - dec ecx - jnz x_loop_2 - - add esi,[line] - add edi,[ext] - - pop ecx - dec ecx - jnz y_loop - -end_y_loop: - pop edi - pop esi - pop ebx - mov esp, ebp - pop ebp - ret - } -} - -//**************************************************************** -// Size: 0, Format: 4 -// 2009 ported to NASM - Sergey (Gonetz) Lipski - -extern "C" void __declspec(naked) asmLoad4bI (wxUIntPtr src, int dst, wxUIntPtr wid_64, int height, int line, int ext) -{ - _asm { - push ebp - mov ebp, esp - push ebx - push esi - push edi - - mov esi,[src] - mov edi,[dst] - mov ecx,[height] -y_loop: - push ecx - mov ecx,[wid_64] -x_loop: - push ecx - - mov eax,[esi] // read all 8 pixels - bswap eax - add esi,4 - mov edx,eax - - // 1st dword { - xor ecx,ecx - shr eax,28 // 0xF0000000 -> 0x0000000F - or ecx,eax - shl eax,4 - or ecx,eax - - mov eax,edx // 0x0F000000 -> 0x00000F00 - shr eax,16 - and eax,0x00000F00 - or ecx,eax - shl eax,4 - or ecx,eax - - mov eax,edx - shr eax,4 // 0x00F00000 -> 0x000F0000 - and eax,0x000F0000 - or ecx,eax - shl eax,4 - or ecx,eax - - mov eax,edx - shl eax,8 // 0x000F0000 -> 0x0F000000 - and eax,0x0F000000 - or ecx,eax - shl eax,4 - or ecx,eax - - mov [edi],ecx - add edi,4 - // } - - // 2nd dword { - xor ecx,ecx - mov eax,edx - shr eax,12 // 0x0000F000 -> 0x0000000F - and eax,0x0000000F - or ecx,eax - shl eax,4 - or ecx,eax - - mov eax,edx // 0x00000F00 -> 0x00000F00 - and eax,0x00000F00 - or ecx,eax - shl eax,4 - or ecx,eax - - mov eax,edx - shl eax,12 // 0x000000F0 -> 0x000F0000 - and eax,0x000F0000 - or ecx,eax - shl eax,4 - or ecx,eax - - shl edx,24 // 0x0000000F -> 0x0F000000 - and edx,0x0F000000 - or ecx,edx - shl edx,4 - or ecx,edx - - mov [edi],ecx - add edi,4 - // } - - // * copy - mov eax,[esi] // read all 8 pixels - bswap eax - add esi,4 - mov edx,eax - - // 1st dword { - xor ecx,ecx - shr eax,28 // 0xF0000000 -> 0x0000000F - or ecx,eax - shl eax,4 - or ecx,eax - - mov eax,edx // 0x0F000000 -> 0x00000F00 - shr eax,16 - and eax,0x00000F00 - or ecx,eax - shl eax,4 - or ecx,eax - - mov eax,edx - shr eax,4 // 0x00F00000 -> 0x000F0000 - and eax,0x000F0000 - or ecx,eax - shl eax,4 - or ecx,eax - - mov eax,edx - shl eax,8 // 0x000F0000 -> 0x0F000000 - and eax,0x0F000000 - or ecx,eax - shl eax,4 - or ecx,eax - - mov [edi],ecx - add edi,4 - // } - - // 2nd dword { - xor ecx,ecx - mov eax,edx - shr eax,12 // 0x0000F000 -> 0x0000000F - and eax,0x0000000F - or ecx,eax - shl eax,4 - or ecx,eax - - mov eax,edx // 0x00000F00 -> 0x00000F00 - and eax,0x00000F00 - or ecx,eax - shl eax,4 - or ecx,eax - - mov eax,edx - shl eax,12 // 0x000000F0 -> 0x000F0000 - and eax,0x000F0000 - or ecx,eax - shl eax,4 - or ecx,eax - - shl edx,24 // 0x0000000F -> 0x0F000000 - and edx,0x0F000000 - or ecx,edx - shl edx,4 - or ecx,edx - - mov [edi],ecx - add edi,4 - // } - // * - - pop ecx - dec ecx - jnz x_loop - - pop ecx - dec ecx - jz near end_y_loop - push ecx - - add esi,[line] - add edi,[ext] - - mov ecx,[wid_64] -x_loop_2: - push ecx - - mov eax,[esi+4] // read all 8 pixels - bswap eax - mov edx,eax - - // 1st dword { - xor ecx,ecx - shr eax,28 // 0xF0000000 -> 0x0000000F - or ecx,eax - shl eax,4 - or ecx,eax - - mov eax,edx // 0x0F000000 -> 0x00000F00 - shr eax,16 - and eax,0x00000F00 - or ecx,eax - shl eax,4 - or ecx,eax - - mov eax,edx - shr eax,4 // 0x00F00000 -> 0x000F0000 - and eax,0x000F0000 - or ecx,eax - shl eax,4 - or ecx,eax - - mov eax,edx - shl eax,8 // 0x000F0000 -> 0x0F000000 - and eax,0x0F000000 - or ecx,eax - shl eax,4 - or ecx,eax - - mov [edi],ecx - add edi,4 - // } - - // 2nd dword { - xor ecx,ecx - mov eax,edx - shr eax,12 // 0x0000F000 -> 0x0000000F - and eax,0x0000000F - or ecx,eax - shl eax,4 - or ecx,eax - - mov eax,edx // 0x00000F00 -> 0x00000F00 - and eax,0x00000F00 - or ecx,eax - shl eax,4 - or ecx,eax - - mov eax,edx - shl eax,12 // 0x000000F0 -> 0x000F0000 - and eax,0x000F0000 - or ecx,eax - shl eax,4 - or ecx,eax - - shl edx,24 // 0x0000000F -> 0x0F000000 - and edx,0x0F000000 - or ecx,edx - shl edx,4 - or ecx,edx - - mov [edi],ecx - add edi,4 - // } - - // * copy - mov eax,[esi] // read all 8 pixels - bswap eax - add esi,8 - mov edx,eax - - // 1st dword { - xor ecx,ecx - shr eax,28 // 0xF0000000 -> 0x0000000F - or ecx,eax - shl eax,4 - or ecx,eax - - mov eax,edx // 0x0F000000 -> 0x00000F00 - shr eax,16 - and eax,0x00000F00 - or ecx,eax - shl eax,4 - or ecx,eax - - mov eax,edx - shr eax,4 // 0x00F00000 -> 0x000F0000 - and eax,0x000F0000 - or ecx,eax - shl eax,4 - or ecx,eax - - mov eax,edx - shl eax,8 // 0x000F0000 -> 0x0F000000 - and eax,0x0F000000 - or ecx,eax - shl eax,4 - or ecx,eax - - mov [edi],ecx - add edi,4 - // } - - // 2nd dword { - xor ecx,ecx - mov eax,edx - shr eax,12 // 0x0000F000 -> 0x0000000F - and eax,0x0000000F - or ecx,eax - shl eax,4 - or ecx,eax - - mov eax,edx // 0x00000F00 -> 0x00000F00 - and eax,0x00000F00 - or ecx,eax - shl eax,4 - or ecx,eax - - mov eax,edx - shl eax,12 // 0x000000F0 -> 0x000F0000 - and eax,0x000F0000 - or ecx,eax - shl eax,4 - or ecx,eax - - shl edx,24 // 0x0000000F -> 0x0F000000 - and edx,0x0F000000 - or ecx,edx - shl edx,4 - or ecx,edx - - mov [edi],ecx - add edi,4 - // } - // * - - pop ecx - dec ecx - jnz x_loop_2 - - add esi,[line] - add edi,[ext] - - pop ecx - dec ecx - jnz y_loop - -end_y_loop: - pop edi - pop esi - pop ebx - mov esp, ebp - pop ebp - ret - } -} -//**************************************************************** -//8b textures load -//**************************************************************** - -//**************************************************************** -// Size: 1, Format: 2 -// -// 2008.03.29 cleaned up - H.Morii -// 2009 ported to NASM - Sergey (Gonetz) Lipski - -extern "C" void __declspec(naked) asmLoad8bCI (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext, wxUIntPtr pal) -{ - _asm { - push ebp - mov ebp, esp - push ebx - push esi - push edi - - mov ebx,[pal] - mov esi,[src] - mov edi,[dst] - mov ecx,[height] -y_loop: - push ecx - mov ecx,[wid_64] -x_loop: - push ecx - - mov eax,[esi] // read all 4 pixels - bswap eax - add esi,4 - mov edx,eax - - // 1st dword output { - shr eax,15 - and eax,0x1FE - mov cx,[ebx+eax] - ror cx,1 - shl ecx,16 - - mov eax,edx - shr eax,23 - and eax,0x1FE - mov cx,[ebx+eax] - ror cx,1 - - mov [edi],ecx - add edi,4 - // } - - // 2nd dword output { - mov eax,edx - shl eax,1 - and eax,0x1FE - mov cx,[ebx+eax] - ror cx,1 - shl ecx,16 - - shr edx,7 - and edx,0x1FE - mov cx,[ebx+edx] - ror cx,1 - - mov [edi],ecx - add edi,4 - // } - - // * copy - mov eax,[esi] // read all 4 pixels - bswap eax - add esi,4 - mov edx,eax - - // 1st dword output { - shr eax,15 - and eax,0x1FE - mov cx,[ebx+eax] - ror cx,1 - shl ecx,16 - - mov eax,edx - shr eax,23 - and eax,0x1FE - mov cx,[ebx+eax] - ror cx,1 - - mov [edi],ecx - add edi,4 - // } - - // 2nd dword output { - mov eax,edx - shl eax,1 - and eax,0x1FE - mov cx,[ebx+eax] - ror cx,1 - shl ecx,16 - - shr edx,7 - and edx,0x1FE - mov cx,[ebx+edx] - ror cx,1 - - mov [edi],ecx - add edi,4 - // } - // * - - pop ecx - - dec ecx - jnz x_loop - - pop ecx - dec ecx - jz near end_y_loop - push ecx - - mov eax,esi - add eax,[line] - mov esi,[src] - sub eax,esi - and eax,0x7FF - add esi,eax - add edi,[ext] - - mov ecx,[wid_64] -x_loop_2: - push ecx - - mov eax,[esi+4] // read all 4 pixels - bswap eax - mov edx,eax - - // 1st dword output { - shr eax,15 - and eax,0x1FE - mov cx,[ebx+eax] - ror cx,1 - shl ecx,16 - - mov eax,edx - shr eax,23 - and eax,0x1FE - mov cx,[ebx+eax] - ror cx,1 - - mov [edi],ecx - add edi,4 - // } - - // 2nd dword output { - mov eax,edx - shl eax,1 - and eax,0x1FE - mov cx,[ebx+eax] - ror cx,1 - shl ecx,16 - - shr edx,7 - and edx,0x1FE - mov cx,[ebx+edx] - ror cx,1 - - mov [edi],ecx - add edi,4 - // } - - // * copy - mov eax,[esi] // read all 4 pixels - bswap eax - mov edx,esi - add edx,8 - mov esi,[src] - sub edx,esi - and edx,0x7FF - add esi,edx - mov edx,eax - - // 1st dword output { - shr eax,15 - and eax,0x1FE - mov cx,[ebx+eax] - ror cx,1 - shl ecx,16 - - mov eax,edx - shr eax,23 - and eax,0x1FE - mov cx,[ebx+eax] - ror cx,1 - - mov [edi],ecx - add edi,4 - // } - - // 2nd dword output { - mov eax,edx - shl eax,1 - and eax,0x1FE - mov cx,[ebx+eax] - ror cx,1 - shl ecx,16 - - shr edx,7 - and edx,0x1FE - mov cx,[ebx+edx] - ror cx,1 - - mov [edi],ecx - add edi,4 - // } - // * - - pop ecx - - dec ecx - jnz x_loop_2 - - mov eax,esi - add eax,[line] - mov esi,[src] - sub eax,esi - and eax,0x7FF - add esi,eax - add edi,[ext] - - pop ecx - dec ecx - jnz y_loop - -end_y_loop: - pop edi - pop esi - pop ebx - mov esp, ebp - pop ebp - ret - } -} - -extern "C" void __declspec(naked) asmLoad8bIA8 (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext, wxUIntPtr pal) -{ - _asm { - push ebp - mov ebp, esp - push ebx - push esi - push edi - - mov ebx,[pal] - mov esi,[src] - mov edi,[dst] - mov ecx,[height] -y_loop: - push ecx - mov ecx,[wid_64] -x_loop: - push ecx - - mov eax,[esi] // read all 4 pixels - bswap eax - add esi,4 - mov edx,eax - - // 1st dword output { - shr eax,15 - and eax,0x1FE - mov cx,[ebx+eax] - ror cx,8 - shl ecx,16 - - mov eax,edx - shr eax,23 - and eax,0x1FE - mov cx,[ebx+eax] - ror cx,8 - - mov [edi],ecx - add edi,4 - // } - - // 2nd dword output { - mov eax,edx - shl eax,1 - and eax,0x1FE - mov cx,[ebx+eax] - ror cx,8 - shl ecx,16 - - shr edx,7 - and edx,0x1FE - mov cx,[ebx+edx] - ror cx,8 - - mov [edi],ecx - add edi,4 - // } - - // * copy - mov eax,[esi] // read all 4 pixels - bswap eax - add esi,4 - mov edx,eax - - // 1st dword output { - shr eax,15 - and eax,0x1FE - mov cx,[ebx+eax] - ror cx,8 - shl ecx,16 - - mov eax,edx - shr eax,23 - and eax,0x1FE - mov cx,[ebx+eax] - ror cx,8 - - mov [edi],ecx - add edi,4 - // } - - // 2nd dword output { - mov eax,edx - shl eax,1 - and eax,0x1FE - mov cx,[ebx+eax] - ror cx,8 - shl ecx,16 - - shr edx,7 - and edx,0x1FE - mov cx,[ebx+edx] - ror cx,8 - - mov [edi],ecx - add edi,4 - // } - // * - - pop ecx - - dec ecx - jnz x_loop - - pop ecx - dec ecx - jz near end_y_loop - push ecx - - add esi,[line] - add edi,[ext] - - mov ecx,[wid_64] -x_loop_2: - push ecx - - mov eax,[esi+4] // read all 4 pixels - bswap eax - mov edx,eax - - // 1st dword output { - shr eax,15 - and eax,0x1FE - mov cx,[ebx+eax] - ror cx,8 - shl ecx,16 - - mov eax,edx - shr eax,23 - and eax,0x1FE - mov cx,[ebx+eax] - ror cx,8 - - mov [edi],ecx - add edi,4 - // } - - // 2nd dword output { - mov eax,edx - shl eax,1 - and eax,0x1FE - mov cx,[ebx+eax] - ror cx,8 - shl ecx,16 - - shr edx,7 - and edx,0x1FE - mov cx,[ebx+edx] - ror cx,8 - - mov [edi],ecx - add edi,4 - // } - - // * copy - mov eax,[esi] // read all 4 pixels - bswap eax - add esi,8 - mov edx,eax - - // 1st dword output { - shr eax,15 - and eax,0x1FE - mov cx,[ebx+eax] - ror cx,8 - shl ecx,16 - - mov eax,edx - shr eax,23 - and eax,0x1FE - mov cx,[ebx+eax] - ror cx,8 - - mov [edi],ecx - add edi,4 - // } - - // 2nd dword output { - mov eax,edx - shl eax,1 - and eax,0x1FE - mov cx,[ebx+eax] - ror cx,8 - shl ecx,16 - - shr edx,7 - and edx,0x1FE - mov cx,[ebx+edx] - ror cx,8 - - mov [edi],ecx - add edi,4 - // } - // * - - pop ecx - - dec ecx - jnz x_loop_2 - - add esi,[line] - add edi,[ext] - - pop ecx - dec ecx - jnz y_loop - -end_y_loop: - pop edi - pop esi - pop ebx - mov esp, ebp - pop ebp - ret - } -} - -//**************************************************************** -// Size: 1, Format: 3 -// -// ** by Gugaman ** -// -// 2008.03.29 cleaned up - H.Morii -// 2009 ported to NASM - Sergey (Gonetz) Lipski - -extern "C" void __declspec(naked) asmLoad8bIA4 (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext) -{ - _asm { - push ebp - mov ebp, esp - push ebx - push esi - push edi - - mov esi,[src] - mov edi,[dst] - mov ecx,[height] -y_loop: - push ecx - mov ecx,[wid_64] -x_loop: - mov eax,[esi] // read all 4 pixels - mov edx,eax - - shr eax,4 //all alpha - shl edx,4 - and eax,0x0F0F0F0F - and edx,0xF0F0F0F0 - add esi,4 - or eax,edx - - mov [edi],eax // save dword - add edi,4 - - mov eax,[esi] // read all 4 pixels - mov edx,eax - - shr eax,4 //all alpha - shl edx,4 - and eax,0x0F0F0F0F - and edx,0xF0F0F0F0 - add esi,4 - or eax,edx - - mov [edi],eax // save dword - add edi,4 - // * - - dec ecx - jnz x_loop - - pop ecx - dec ecx - jz end_y_loop - push ecx - - add esi,[line] - add edi,[ext] - - mov ecx,[wid_64] -x_loop_2: - mov eax,[esi+4] // read both pixels - mov edx,eax - - shr eax,4 //all alpha - shl edx,4 - and eax,0x0F0F0F0F - and edx,0xF0F0F0F0 - or eax,edx - - mov [edi],eax //save dword - add edi,4 - - mov eax,[esi] // read both pixels - add esi,8 - mov edx,eax - - shr eax,4 //all alpha - shl edx,4 - and eax,0x0F0F0F0F - and edx,0xF0F0F0F0 - or eax,edx - - mov [edi],eax //save dword - add edi,4 - // * - - dec ecx - jnz x_loop_2 - - add esi,[line] - add edi,[ext] - - pop ecx - dec ecx - jnz y_loop - -end_y_loop: - pop edi - pop esi - pop ebx - mov esp, ebp - pop ebp - ret - } -} - -//**************************************************************** -// Size: 1, Format: 4 -// -// ** by Gugaman ** -// 2009 ported to NASM - Sergey (Gonetz) Lipski - -extern "C" void __declspec(naked) asmLoad8bI (wxUIntPtr src, int dst, wxUIntPtr wid_64, int height, int line, int ext) -{ - _asm { - push ebp - mov ebp, esp - push ebx - push esi - push edi - - mov esi,[src] - mov edi,[dst] - mov ecx,[height] -y_loop: - push ecx - mov ecx,[wid_64] -x_loop: - mov eax,[esi] // read all 4 pixels - add esi,4 - - mov [edi],eax // save dword - add edi,4 - - mov eax,[esi] // read all 4 pixels - add esi,4 - - mov [edi],eax // save dword - add edi,4 - // * - - dec ecx - jnz x_loop - - pop ecx - dec ecx - jz end_y_loop - push ecx - - add esi,[line] - add edi,[ext] - - mov ecx,[wid_64] -x_loop_2: - mov eax,[esi+4] // read both pixels - - mov [edi],eax //save dword - add edi,4 - - mov eax,[esi] // read both pixels - add esi,8 - - mov [edi],eax //save dword - add edi,4 - // * - - dec ecx - jnz x_loop_2 - - add esi,[line] - add edi,[ext] - - pop ecx - dec ecx - jnz y_loop - -end_y_loop: - pop edi - pop esi - pop ebx - mov esp, ebp - pop ebp - ret - } -} - - -//**************************************************************** -//16b textures load -//**************************************************************** - -//**************************************************************** -// Size: 2, Format: 0 -// -// 2008.03.29 cleaned up - H.Morii -// 2009 ported to NASM - Sergey (Gonetz) Lipski - -extern "C" void __declspec(naked) asmLoad16bRGBA (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext) -{ - _asm { - align 4 - push ebp - mov ebp,esp - push ebx - push esi - push edi - - mov esi,[src] - mov edi,[dst] - mov ecx,[height] -y_loop: - push ecx - mov ecx,[wid_64] -x_loop: - mov eax,[esi] // read both pixels - mov ebx,[esi+4] // read both pixels - bswap eax - bswap ebx - - ror ax,1 - ror bx,1 - ror eax,16 - ror ebx,16 - ror ax,1 - ror bx,1 - - mov [edi],eax - mov [edi+4],ebx - add esi,8 - add edi,8 - - dec ecx - jnz x_loop - - pop ecx - dec ecx - jz end_y_loop - push ecx - - mov eax,esi - add eax,[line] - mov esi,[src] - sub eax, esi - and eax, 0xFFF - add esi, eax - add edi,[ext] - - mov ecx,[wid_64] -x_loop_2: - mov eax,[esi+4] // read both pixels - mov ebx,[esi] // read both pixels - bswap eax - bswap ebx - - ror ax,1 - ror bx,1 - ror eax,16 - ror ebx,16 - ror ax,1 - ror bx,1 - - mov [edi],eax - mov [edi+4],ebx - add esi,8 - add edi,8 - - dec ecx - jnz x_loop_2 - - mov eax,esi - add eax,[line] - mov esi,[src] - sub eax, esi - and eax, 0xFFF - add esi, eax - add edi,[ext] - - pop ecx - dec ecx - jnz y_loop - -end_y_loop: - pop edi - pop esi - pop ebx - mov esp, ebp - pop ebp - ret - } -} - - - -//**************************************************************** -// Size: 2, Format: 3 -// -// ** by Gugaman/Dave2001 ** -// -// 2008.03.29 cleaned up - H.Morii -// 2009 ported to NASM - Sergey (Gonetz) Lipski - -extern "C" void __declspec(naked) asmLoad16bIA (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext) -{ - _asm { - ALIGN 4 - - push ebp - mov ebp, esp - push ebx - push esi - push edi - - mov esi,[src] - mov edi,[dst] - mov ecx,[height] -y_loop: - push ecx - mov ecx,[wid_64] -x_loop: - mov eax,[esi] // read both pixels - mov ebx,[esi+4] // read both pixels - mov [edi],eax - mov [edi+4],ebx - add esi,8 - add edi,8 - - dec ecx - jnz x_loop - - pop ecx - dec ecx - jz end_y_loop - push ecx - - add esi,[line] - add edi,[ext] - - mov ecx,[wid_64] -x_loop_2: - mov eax,[esi+4] // read both pixels - mov ebx,[esi] // read both pixels - mov [edi],eax - mov [edi+4],ebx - add esi,8 - add edi,8 - - dec ecx - jnz x_loop_2 - - add esi,[line] - add edi,[ext] - - pop ecx - dec ecx - jnz y_loop - -end_y_loop: - pop edi - pop esi - pop ebx - mov esp, ebp - pop ebp - ret - } -} - -//**************************************************************** -// -// ******** Textures mirror/clamp/wrap ******** -// -//**************************************************************** - -//**************************************************************** -//8b textures mirror/clamp/wrap -//**************************************************************** - -extern "C" void __declspec(naked) asmMirror8bS (int tex, int start, int width, int height, int mask, int line, int full, int count) -{ - _asm{ - ALIGN 4 - - push ebp - mov ebp, esp - push ebx - push esi - push edi - - mov edi,[start] - mov ecx,[height] -loop_y: - - xor edx,edx -loop_x: - mov esi,[tex] - mov ebx,[width] - add ebx,edx - and ebx,[width] - jnz is_mirrored - - mov eax,edx - and eax,[mask] - add esi,eax - mov al,[esi] - mov [edi],al - inc edi - jmp end_mirror_check -is_mirrored: - add esi,[mask] - mov eax,edx - and eax,[mask] - sub esi,eax - mov al,[esi] - mov [edi],al - inc edi -end_mirror_check: - - inc edx - cmp edx,[count] - jne loop_x - - add edi,[line] - mov eax,[tex] - add eax,[full] - mov [tex],eax - - dec ecx - jnz loop_y - - pop edi - pop esi - pop ebx - mov esp, ebp - pop ebp - ret - } -} - -extern "C" void __declspec(naked) asmWrap8bS (int tex, int start, int height, int mask, int line, int full, int count) -{ - _asm { - align 4 - push ebp - mov ebp, esp - push ebx - push esi - push edi - - mov edi,[start] - mov ecx,[height] -loop_y: - - xor edx,edx -loop_x: - - mov esi,[tex] - mov eax,edx - and eax,[mask] - shl eax,2 - add esi,eax - mov eax,[esi] - mov [edi],eax - add edi,4 - - inc edx - cmp edx,[count] - jne loop_x - - add edi,[line] - mov eax,[tex] - add eax,[full] - mov [tex],eax - - dec ecx - jnz loop_y - - pop edi - pop esi - pop ebx - mov esp, ebp - pop ebp - ret - } -} - -extern "C" void __declspec(naked) asmClamp8bS (int tex, int constant, int height,int line, int full, int count) -{ - _asm { - align 4 - push ebp - mov ebp, esp - push ebx - push esi - push edi - - mov esi,[constant] - mov edi,[tex] - - mov ecx,[height] -y_loop: - - mov al,[esi] - - mov edx,[count] -x_loop: - - mov [edi],al // don't unroll or make dword, it may go into next line (doesn't have to be multiple of two) - inc edi - - dec edx - jnz x_loop - - add esi,[full] - add edi,[line] - - dec ecx - jnz y_loop - - pop edi - pop esi - pop ebx - mov esp, ebp - pop ebp - ret - } -} - -//**************************************************************** -//16b textures mirror/clamp/wrap -//**************************************************************** - -extern "C" void __declspec(naked) asmMirror16bS (int tex, int start, int width, int height, int mask, int line, int full, int count) -{ - _asm { - align 4 - push ebp - mov ebp, esp - push ebx - push esi - push edi - - mov edi,[start] - mov ecx,[height] -loop_y: - - xor edx,edx -loop_x: - mov esi,[tex] - mov ebx,[width] - add ebx,edx - and ebx,[width] - jnz is_mirrored - - mov eax,edx - shl eax,1 - and eax,[mask] - add esi,eax - mov ax,[esi] - mov [edi],ax - add edi,2 - jmp end_mirror_check -is_mirrored: - add esi,[mask] - mov eax,edx - shl eax,1 - and eax,[mask] - sub esi,eax - mov ax,[esi] - mov [edi],ax - add edi,2 -end_mirror_check: - - inc edx - cmp edx,[count] - jne loop_x - - add edi,[line] - mov eax,[tex] - add eax,[full] - mov [tex],eax - - dec ecx - jnz loop_y - - pop edi - pop esi - pop ebx - mov esp, ebp - pop ebp - ret - } -} - -extern "C" void __declspec(naked) asmWrap16bS (int tex, int start, int height, int mask, int line, int full, int count) -{ - _asm { - align 4 - push ebp - mov ebp, esp - push ebx - push esi - push edi - - mov edi,[start] - mov ecx,[height] -loop_y: - - xor edx,edx -loop_x: - - mov esi,[tex] - mov eax,edx - and eax,[mask] - shl eax,2 - add esi,eax - mov eax,[esi] - mov [edi],eax - add edi,4 - - inc edx - cmp edx,[count] - jne loop_x - - add edi,[line] - mov eax,[tex] - add eax,[full] - mov [tex],eax - - dec ecx - jnz loop_y - - pop edi - pop esi - pop ebx - mov esp, ebp - pop ebp - ret - } -} - -extern "C" void __declspec(naked) asmClamp16bS (int tex, int constant, int height,int line, int full, int count) -{ - _asm { - align 4 - push ebp - mov ebp, esp - push ebx - push esi - push edi - - mov esi,[constant] - mov edi,[tex] - - mov ecx,[height] -y_loop: - - mov ax,[esi] - - mov edx,[count] -x_loop: - - mov [edi],ax // don't unroll or make dword, it may go into next line (doesn't have to be multiple of two) - add edi,2 - - dec edx - jnz x_loop - - add esi,[full] - add edi,[line] - - dec ecx - jnz y_loop - - pop edi - pop esi - pop ebx - mov esp, ebp - pop ebp - ret - } -} - -//**************************************************************** -//32b textures mirror/clamp/wrap -//**************************************************************** - -extern "C" void __declspec(naked) asmMirror32bS (int tex, int start, int width, int height, int mask, int line, int full, int count) -{ - _asm { - align 4 - push ebp - mov ebp, esp - push ebx - push esi - push edi - - mov edi,[start] - mov ecx,[height] -loop_y: - - xor edx,edx -loop_x: - mov esi,[tex] - mov ebx,[width] - add ebx,edx - and ebx,[width] - jnz is_mirrored - - mov eax,edx - shl eax,2 - and eax,[mask] - add esi,eax - mov eax,[esi] - mov [edi],eax - add edi,4 - jmp end_mirror_check -is_mirrored: - add esi,[mask] - mov eax,edx - shl eax,2 - and eax,[mask] - sub esi,eax - mov eax,[esi] - mov [edi],eax - add edi,4 -end_mirror_check: - - inc edx - cmp edx,[count] - jne loop_x - - add edi,[line] - mov eax,[tex] - add eax,[full] - mov [tex],eax - - dec ecx - jnz loop_y - - pop edi - pop esi - pop ebx - mov esp, ebp - pop ebp - ret - } -} - -extern "C" void __declspec(naked) asmWrap32bS (int tex, int start, int height, int mask, int line, int full, int count) -{ - _asm { - align 4 - push ebp - mov ebp, esp - push ebx - push esi - push edi - - mov edi,[start] - mov ecx,[height] -loop_y: - - xor edx,edx -loop_x: - - mov esi,[tex] - mov eax,edx - and eax,[mask] - shl eax,2 - add esi,eax - mov eax,[esi] - mov [edi],eax - add edi,4 - - inc edx - cmp edx,[count] - jne loop_x - - add edi,[line] - mov eax,[tex] - add eax,[full] - mov [tex],eax - - dec ecx - jnz loop_y - - pop edi - pop esi - pop ebx - mov esp, ebp - pop ebp - ret - } -} - -extern "C" void __declspec(naked) asmClamp32bS (int tex, int constant, int height,int line, int full, int count) -{ - _asm { - align 4 - push ebp - mov ebp, esp - push ebx - push esi - push edi - - mov esi,[constant] - mov edi,[tex] - - mov ecx,[height] -y_loop: - - mov eax,[esi] - - mov edx,[count] -x_loop: - - mov [edi],eax // don't unroll or make dword, it may go into next line (doesn't have to be multiple of two) - add edi,4 - - dec edx - jnz x_loop - - add esi,[full] - add edi,[line] - - dec ecx - jnz y_loop - - pop edi - pop esi - pop ebx - mov esp, ebp - pop ebp - ret - } -} - -//**************************************************************** -// -// ******** Textures conversion ******** -// -//**************************************************************** - -extern "C" void __declspec(naked) asmTexConv_ARGB1555_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int isize) -{ - _asm { - align 4 - push ebp - mov ebp, esp - push ebx - push esi - push edi - - mov esi,[src] - mov edi,[dst] - mov ecx,[isize] - -tc1_loop: - mov eax,[esi] - add esi,4 - - // arrr rrgg gggb bbbb - // aaaa rrrr gggg bbbb - mov edx,eax - and eax,0x80008000 - mov ebx,eax // ebx = 0xa000000000000000 - shr eax,1 - or ebx,eax // ebx = 0xaa00000000000000 - shr eax,1 - or ebx,eax // ebx = 0xaaa0000000000000 - shr eax,1 - or ebx,eax // ebx = 0xaaaa000000000000 - - mov eax,edx - and eax,0x78007800 // eax = 0x0rrrr00000000000 - shr eax,3 // eax = 0x0000rrrr00000000 - or ebx,eax // ebx = 0xaaaarrrr00000000 - - mov eax,edx - and eax,0x03c003c0 // eax = 0x000000gggg000000 - shr eax,2 // eax = 0x00000000gggg0000 - or ebx,eax // ebx = 0xaaaarrrrgggg0000 - - and edx,0x001e001e // edx = 0x00000000000bbbb0 - shr edx,1 // edx = 0x000000000000bbbb - or ebx,edx // ebx = 0xaaaarrrrggggbbbb - - mov [edi],ebx - add edi,4 - - dec ecx - jnz tc1_loop - - pop edi - pop esi - pop ebx - mov esp, ebp - pop ebp - ret - } -} - -extern "C" void __declspec(naked) asmTexConv_AI88_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int isize) -{ - _asm { - align 4 - push ebp - mov ebp, esp - push ebx - push esi - push edi - - mov esi,[src] - mov edi,[dst] - mov ecx,[isize] - -tc1_loop: - mov eax,[esi] - add esi,4 - - // aaaa aaaa iiii iiii - // aaaa rrrr gggg bbbb - mov edx,eax - and eax,0xF000F000 // eax = 0xaaaa000000000000 - mov ebx,eax // ebx = 0xaaaa000000000000 - - and edx,0x00F000F0 // edx = 0x00000000iiii0000 - shl edx,4 // edx = 0x0000iiii00000000 - or ebx,edx // ebx = 0xaaaaiiii00000000 - shr edx,4 // edx = 0x00000000iiii0000 - or ebx,edx // ebx = 0xaaaaiiiiiiii0000 - shr edx,4 // edx = 0x000000000000iiii - or ebx,edx // ebx = 0xaaaaiiiiiiiiiiii - - mov [edi],ebx - add edi,4 - - dec ecx - jnz tc1_loop - - pop edi - pop esi - pop ebx - mov esp, ebp - pop ebp - ret - } -} - -extern "C" void __declspec(naked) asmTexConv_AI44_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int isize) -{ - _asm { - align 4 - push ebp - mov ebp, esp - push ebx - push esi - push edi - - mov esi,[src] - mov edi,[dst] - mov ecx,[isize] - -tc1_loop: - mov eax,[esi] - add esi,4 - - // aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0 - // aaaa1 rrrr1 gggg1 bbbb1 aaaa0 rrrr0 gggg0 bbbb0 - // aaaa3 rrrr3 gggg3 bbbb3 aaaa2 rrrr2 gggg2 bbbb2 - mov edx,eax // eax = aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0 - shl eax,16 // eax = aaaa1 iiii1 aaaa0 iiii0 0000 0000 0000 0000 - and eax,0xFF000000 // eax = aaaa1 iiii1 0000 0000 0000 0000 0000 0000 - mov ebx,eax // ebx = aaaa1 iiii1 0000 0000 0000 0000 0000 0000 - and eax,0x0F000000 // eax = 0000 iiii1 0000 0000 0000 0000 0000 0000 - shr eax,4 // eax = 0000 0000 iiii1 0000 0000 0000 0000 0000 - or ebx,eax // ebx = aaaa1 iiii1 iiii1 0000 0000 0000 0000 0000 - shr eax,4 // eax = 0000 0000 0000 iiii1 0000 0000 0000 0000 - or ebx,eax // ebx = aaaa1 iiii1 iiii1 iiii1 0000 0000 0000 0000 - - mov eax,edx // eax = aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0 - shl eax,8 // eax = aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0 0000 0000 - and eax,0x0000FF00 // eax = 0000 0000 0000 0000 aaaa0 iiii0 0000 0000 - or ebx,eax // ebx = aaaa1 iiii1 iiii1 iiii1 aaaa0 iiii0 0000 0000 - and eax,0x00000F00 // eax = 0000 0000 0000 0000 0000 iiii0 0000 0000 - shr eax,4 // eax = 0000 0000 0000 0000 0000 0000 iiii0 0000 - or ebx,eax // ebx = aaaa1 iiii1 iiii1 iiii1 aaaa0 iiii0 iiii0 0000 - shr eax,4 // eax = 0000 0000 0000 0000 0000 0000 0000 iiii0 - or ebx,eax // ebx = aaaa1 iiii1 iiii1 iiii1 aaaa0 iiii0 iiii0 iiii0 - - mov [edi],ebx - add edi,4 - - mov eax,edx // eax = aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0 - and eax,0xFF000000 // eax = aaaa3 iiii3 0000 0000 0000 0000 0000 0000 - mov ebx,eax // ebx = aaaa3 iiii3 0000 0000 0000 0000 0000 0000 - and eax,0x0F000000 // eax = 0000 iiii3 0000 0000 0000 0000 0000 0000 - shr eax,4 // eax = 0000 0000 iiii3 0000 0000 0000 0000 0000 - or ebx,eax // ebx = aaaa3 iiii3 iiii3 0000 0000 0000 0000 0000 - shr eax,4 // eax = 0000 0000 0000 iiii3 0000 0000 0000 0000 - or ebx,eax // ebx = aaaa3 iiii3 iiii3 iiii3 0000 0000 0000 0000 - - // edx = aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0 - shr edx,8 // edx = 0000 0000 aaaa3 aaaa3 aaaa2 iiii2 aaaa1 iiii1 - and edx,0x0000FF00 // edx = 0000 0000 0000 0000 aaaa2 iiii2 0000 0000 - or ebx,edx // ebx = aaaa3 iiii3 iiii3 iiii3 aaaa2 iiii2 0000 0000 - and edx,0x00000F00 // edx = 0000 0000 0000 0000 0000 iiii2 0000 0000 - shr edx,4 // edx = 0000 0000 0000 0000 0000 0000 iiii2 0000 - or ebx,edx // ebx = aaaa3 iiii3 iiii3 iiii3 aaaa2 iiii2 iiii2 0000 - shr edx,4 // edx = 0000 0000 0000 0000 0000 0000 0000 iiii2 - or ebx,edx // ebx = aaaa3 iiii3 iiii3 iiii3 aaaa2 iiii2 iiii2 iiii2 - - mov [edi],ebx - add edi,4 - - dec ecx - jnz tc1_loop - - pop edi - pop esi - pop ebx - mov esp, ebp - pop ebp - ret - } -} - -extern "C" void __declspec(naked) asmTexConv_A8_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int isize) -{ - _asm { - align 4 - push ebp - mov ebp, esp - push ebx - push esi - push edi - - mov esi,[src] - mov edi,[dst] - mov ecx,[isize] - -tc1_loop: - mov eax,[esi] - add esi,4 - - // aaaa3 aaaa3 aaaa2 aaaa2 aaaa1 aaaa1 aaaa0 aaaa0 - // aaaa1 rrrr1 gggg1 bbbb1 aaaa0 rrrr0 gggg0 bbbb0 - // aaaa3 rrrr3 gggg3 bbbb3 aaaa2 rrrr2 gggg2 bbbb2 - mov edx,eax - and eax,0x0000F000 // eax = 00 00 00 00 a1 00 00 00 - shl eax,16 // eax = a1 00 00 00 00 00 00 00 - mov ebx,eax // ebx = a1 00 00 00 00 00 00 00 - shr eax,4 - or ebx,eax // ebx = a1 a1 00 00 00 00 00 00 - shr eax,4 - or ebx,eax // ebx = a1 a1 a1 00 00 00 00 00 - shr eax,4 - or ebx,eax // ebx = a1 a1 a1 a1 00 00 00 00 - - mov eax,edx - and eax,0x000000F0 // eax = 00 00 00 00 00 00 a0 00 - shl eax,8 // eax = 00 00 00 00 a0 00 00 00 - or ebx,eax - shr eax,4 - or ebx,eax - shr eax,4 - or ebx,eax - shr eax,4 - or ebx,eax // ebx = a1 a1 a1 a1 a0 a0 a0 a0 - - mov [edi],ebx - add edi,4 - - mov eax,edx // eax = a3 a3 a2 a2 a1 a1 a0 a0 - and eax,0xF0000000 // eax = a3 00 00 00 00 00 00 00 - mov ebx,eax // ebx = a3 00 00 00 00 00 00 00 - shr eax,4 - or ebx,eax // ebx = a3 a3 00 00 00 00 00 00 - shr eax,4 - or ebx,eax // ebx = a3 a3 a3 00 00 00 00 00 - shr eax,4 - or ebx,eax // ebx = a3 a3 a3 a3 00 00 00 00 - - and edx,0x00F00000 // eax = 00 00 a2 00 00 00 00 00 - shr edx,8 // eax = 00 00 00 00 a2 00 00 00 - or ebx,edx - shr edx,4 - or ebx,edx - shr edx,4 - or ebx,edx - shr edx,4 - or ebx,edx // ebx = a3 a3 a3 a3 a2 a2 a2 a2 - - mov [edi],ebx - add edi,4 - - dec ecx - jnz tc1_loop - - pop edi - pop esi - pop ebx - mov esp, ebp - pop ebp - ret - } -} - -//**************************************************************** -// -// ******** Tmem functions ******** -// -//**************************************************************** - -//**************************************************************** -// CopyBlock - copies a block from base_addr+offset to dest_addr, while unswapping the -// data within. -// -// edi = dest_addr -> end of dest -// ecx = num_words -// esi = base_addr (preserved) -// edx = offset (preserved) -//**************************************************************** -void __declspec(naked) CopyBlock ( void ) -{ - _asm { - align 4 - push ebp - mov ebp, esp - push eax - push ebx - push esi - push edx - - or ecx,ecx - jz near copyblock_end - - push ecx - - // first, set the source address and check if not on a dword boundary - push esi - push edx - mov ebx,edx - and edx,0FFFFFFFCh - add esi,edx - - and ebx,3 // ebx = # we DON'T need to copy - jz copyblock_copy - - mov edx,4 // ecx = # we DO need to copy - sub edx,ebx - - // load the first word, accounting for swapping - - mov eax,[esi] - add esi,4 -copyblock_precopy_skip: - rol eax,8 - dec ebx - jnz copyblock_precopy_skip - -copyblock_precopy_copy: - rol eax,8 - mov [edi],al - inc edi - dec edx - jnz copyblock_precopy_copy - - mov eax,[esi] - add esi,4 - bswap eax - mov [edi],eax - add edi,4 - - dec ecx // 1 less word to copy - jz copyblock_postcopy - -copyblock_copy: - mov eax,[esi] - bswap eax - mov [edi],eax - - mov eax,[esi+4] - bswap eax - mov [edi+4],eax - - add esi,8 - add edi,8 - - dec ecx - jnz copyblock_copy - -copyblock_postcopy: - pop edx - pop esi - pop ecx - - // check again if on dword boundary - mov ebx,edx // ebx = # we DO need to copy - - and ebx,3 - jz copyblock_end - - shl ecx,3 // ecx = num_words * 8 - add edx,ecx - and edx,0FFFFFFFCh - add esi,edx - - mov eax,[esi] - -copyblock_postcopy_copy: - rol eax,8 - mov [edi],al - inc edi - dec ebx - jnz copyblock_postcopy_copy - -copyblock_end: - pop edx - pop esi - pop ebx - pop eax - mov esp, ebp - pop ebp - ret - } -} - -extern "C" __declspec(naked) void SwapBlock32 ( void ) -{ -//**************************************************************** -// SwapBlock - swaps every other 32-bit word at addr -// -// ecx = num_words -> 0 -// edi = addr -> end of dest -//**************************************************************** - _asm { - align 4 - push ebp - mov ebp, esp - push eax - push ebx - or ecx,ecx - jz swapblock32_end -swapblock32_loop: - mov eax,[edi] - mov ebx,[edi+4] - mov [edi],ebx - mov [edi+4],eax - add edi,8 - dec ecx - jnz swapblock32_loop -swapblock32_end: - pop ebx - pop eax - mov esp, ebp - pop ebp - ret - } -} - -//**************************************************************** -// -// ******** Load block/tile ******** -// -//**************************************************************** - -extern "C" __declspec(naked) void asmLoadBlock(int src, int dst, int off, int dxt, int cnt, wxUIntPtr swp) -{ - _asm { - align 4 - push ebp - mov ebp, esp - - push ebx - push esi - push edi - - // copy the data - mov esi,[src] - mov edi,[dst] - mov ecx,[cnt] - mov edx,[off] - call CopyBlock - - // now swap it - mov eax,[cnt] // eax = count remaining - xor edx,edx // edx = dxt counter - mov edi,[dst] - mov ebx,[dxt] - - xor ecx,ecx // ecx = how much to copy -dxt_test: - add edi,8 - dec eax - jz end_dxt_test - add edx,ebx - jns dxt_test - -dxt_s_test: - inc ecx - dec eax - jz end_dxt_test - add edx,ebx - js dxt_s_test - - // swap this data (ecx set, dst set) - call [swp] // (ecx reset to 0 after) - - jmp dxt_test // and repeat - -end_dxt_test: - // swap any remaining data - call [swp] - - pop edi - pop esi - pop ebx - mov esp, ebp - pop ebp - ret - } -} - -extern "C" __declspec(naked) void asmLoadTile(int src, int dst, int width, int height, int line, int off, int end) -{ - _asm { - align 4 - push ebp - mov ebp, esp - - push ebx - push esi - push edi - - // set initial values - mov edi,[dst] - mov ecx,[width] - mov esi,[src] - mov edx,[off] - xor ebx,ebx // swap this line? - mov eax,[height] - -loadtile_loop: - cmp [end],edi // end of tmem: error - jc loadtile_end - - // copy this line - push edi - push ecx - call CopyBlock - pop ecx - - // swap it? - xor ebx,1 - jnz loadtile_no_swap - - // (ecx set, restore edi) - pop edi - push ecx - call SwapBlock32 - pop ecx - jmp loadtile_swap_end -loadtile_no_swap: - add sp,4 // forget edi, we are already at the next position -loadtile_swap_end: - - add edx,[line] - - dec eax - jnz loadtile_loop - -loadtile_end: - - pop edi - pop esi - pop ebx - mov esp, ebp - pop ebp - ret - } -} - - -//**************************************************************** -// -// ******** Texture CRC ******** -// -//**************************************************************** -extern "C" __declspec(naked) int asmTextureCRC(int addr, int width, int height, int line) -{ - _asm { - align 4 - push ebp - mov ebp, esp - - push ebx - push edi - - xor eax,eax // eax is final result - mov ebx,[line] - mov ecx,[height] // ecx is height counter - mov edi,[addr] // edi is ptr to texture memory -crc_loop_y: - push ecx - - mov ecx,[width] -crc_loop_x: - - add eax,[edi] // MUST be 64-bit aligned, so manually unroll - add eax,[edi+4] - mov edx,ecx - mul edx - add eax,edx - add edi,8 - - dec ecx - jnz crc_loop_x - - pop ecx - - mov edx,ecx - mul edx - add eax,edx - - add edi,ebx - - dec ecx - jnz crc_loop_y - - pop edi - pop ebx - mov esp, ebp - pop ebp - ret - } -} diff --git a/Source/Glide64/rdp.cpp b/Source/Glide64/rdp.cpp index e7397e08b..52213cf07 100644 --- a/Source/Glide64/rdp.cpp +++ b/Source/Glide64/rdp.cpp @@ -48,8 +48,38 @@ #include "FBtoScreen.h" #include "CRC.h" -extern "C" void SwapBlock32 (); -extern "C" void SwapBlock64 (); +extern "C" __declspec(naked) void SwapBlock32 ( void ) +{ +//**************************************************************** +// SwapBlock - swaps every other 32-bit word at addr +// +// ecx = num_words -> 0 +// edi = addr -> end of dest +//**************************************************************** + _asm { + align 4 + push ebp + mov ebp, esp + push eax + push ebx + or ecx,ecx + jz swapblock32_end +swapblock32_loop: + mov eax,[edi] + mov ebx,[edi+4] + mov [edi],ebx + mov [edi+4],eax + add edi,8 + dec ecx + jnz swapblock32_loop +swapblock32_end: + pop ebx + pop eax + mov esp, ebp + pop ebp + ret + } +} const int NumOfFormats = 3; SCREEN_SHOT_FORMAT ScreenShotFormats[NumOfFormats] = { {wxT("BMP"), wxT("bmp"), wxBITMAP_TYPE_BMP}, {wxT("PNG"), wxT("png"), wxBITMAP_TYPE_PNG}, {wxT("JPEG"), wxT("jpeg"), wxBITMAP_TYPE_JPEG} }; @@ -1824,7 +1854,168 @@ void setTBufTex(wxUint16 t_mem, wxUint32 cnt) } } -extern "C" void asmLoadBlock(int src, int dst, int off, int dxt, int cnt, int swp); +void __declspec(naked) CopyBlock ( void ) +{ + _asm { + align 4 + push ebp + mov ebp, esp + push eax + push ebx + push esi + push edx + + or ecx,ecx + jz near copyblock_end + + push ecx + + // first, set the source address and check if not on a dword boundary + push esi + push edx + mov ebx,edx + and edx,0FFFFFFFCh + add esi,edx + + and ebx,3 // ebx = # we DON'T need to copy + jz copyblock_copy + + mov edx,4 // ecx = # we DO need to copy + sub edx,ebx + + // load the first word, accounting for swapping + + mov eax,[esi] + add esi,4 +copyblock_precopy_skip: + rol eax,8 + dec ebx + jnz copyblock_precopy_skip + +copyblock_precopy_copy: + rol eax,8 + mov [edi],al + inc edi + dec edx + jnz copyblock_precopy_copy + + mov eax,[esi] + add esi,4 + bswap eax + mov [edi],eax + add edi,4 + + dec ecx // 1 less word to copy + jz copyblock_postcopy + +copyblock_copy: + mov eax,[esi] + bswap eax + mov [edi],eax + + mov eax,[esi+4] + bswap eax + mov [edi+4],eax + + add esi,8 + add edi,8 + + dec ecx + jnz copyblock_copy + +copyblock_postcopy: + pop edx + pop esi + pop ecx + + // check again if on dword boundary + mov ebx,edx // ebx = # we DO need to copy + + and ebx,3 + jz copyblock_end + + shl ecx,3 // ecx = num_words * 8 + add edx,ecx + and edx,0FFFFFFFCh + add esi,edx + + mov eax,[esi] + +copyblock_postcopy_copy: + rol eax,8 + mov [edi],al + inc edi + dec ebx + jnz copyblock_postcopy_copy + +copyblock_end: + pop edx + pop esi + pop ebx + pop eax + mov esp, ebp + pop ebp + ret + } +} + +extern "C" __declspec(naked) void asmLoadBlock(int src, int dst, int off, int dxt, int cnt, wxUIntPtr swp) +{ + _asm { + align 4 + push ebp + mov ebp, esp + + push ebx + push esi + push edi + + // copy the data + mov esi,[src] + mov edi,[dst] + mov ecx,[cnt] + mov edx,[off] + call CopyBlock + + // now swap it + mov eax,[cnt] // eax = count remaining + xor edx,edx // edx = dxt counter + mov edi,[dst] + mov ebx,[dxt] + + xor ecx,ecx // ecx = how much to copy +dxt_test: + add edi,8 + dec eax + jz end_dxt_test + add edx,ebx + jns dxt_test + +dxt_s_test: + inc ecx + dec eax + jz end_dxt_test + add edx,ebx + js dxt_s_test + + // swap this data (ecx set, dst set) + call [swp] // (ecx reset to 0 after) + + jmp dxt_test // and repeat + +end_dxt_test: + // swap any remaining data + call [swp] + + pop edi + pop esi + pop ebx + mov esp, ebp + pop ebp + ret + } +} + void LoadBlock32b(wxUint32 tile, wxUint32 ul_s, wxUint32 ul_t, wxUint32 lr_s, wxUint32 dxt); static void rdp_loadblock() { @@ -1916,7 +2107,65 @@ static void rdp_loadblock() setTBufTex(rdp.tiles[tile].t_mem, cnt); } -extern "C" void asmLoadTile(int src, int dst, int width, int height, int line, int off, int end); +extern "C" __declspec(naked) void asmLoadTile(int src, int dst, int width, int height, int line, int off, int end) +{ + _asm { + align 4 + push ebp + mov ebp, esp + + push ebx + push esi + push edi + + // set initial values + mov edi,[dst] + mov ecx,[width] + mov esi,[src] + mov edx,[off] + xor ebx,ebx // swap this line? + mov eax,[height] + +loadtile_loop: + cmp [end],edi // end of tmem: error + jc loadtile_end + + // copy this line + push edi + push ecx + call CopyBlock + pop ecx + + // swap it? + xor ebx,1 + jnz loadtile_no_swap + + // (ecx set, restore edi) + pop edi + push ecx + call SwapBlock32 + pop ecx + jmp loadtile_swap_end +loadtile_no_swap: + add sp,4 // forget edi, we are already at the next position +loadtile_swap_end: + + add edx,[line] + + dec eax + jnz loadtile_loop + +loadtile_end: + + pop edi + pop esi + pop ebx + mov esp, ebp + pop ebp + ret + } +} + void LoadTile32b (wxUint32 tile, wxUint32 ul_s, wxUint32 ul_t, wxUint32 width, wxUint32 height); static void rdp_loadtile() {