project64/Source/Glide64/TexLoad4b.h

2003 lines
42 KiB
C

/*
* Glide64 - Glide video plugin for Nintendo 64 emulators.
* Copyright (c) 2002 Dave2001
* Copyright (c) 2003-2009 Sergey 'Gonetz' Lipski
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
//****************************************************************
//
// Glide64 - Glide Plugin for Nintendo 64 emulators
// Project started on December 29th, 2001
//
// Authors:
// Dave2001, original author, founded the project in 2001, left it in 2002
// Gugaman, joined the project in 2002, left it in 2002
// Sergey 'Gonetz' Lipski, joined the project in 2002, main author since fall of 2002
// Hiroshi 'KoolSmoky' Morii, joined the project in 2007
//
//****************************************************************
//
// To modify Glide64:
// * Write your name and (optional)email, commented by your work, so I know who did it, and so that you can find which parts you modified when it comes time to send it to me.
// * Do NOT send me the whole project or file that you modified. Take out your modified code sections, and tell me where to put them. If people sent the whole thing, I would have many different versions, but no idea how to combine them all.
//
//****************************************************************
extern "C" void __declspec(naked) asmLoad4bCI (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext, wxUIntPtr pal)
{
_asm {
push ebp
mov ebp, esp
push ebx
push esi
push edi
mov ebx,[pal]
mov esi,[src]
mov edi,[dst]
mov ecx,[height]
y_loop:
push ecx
mov ecx,[wid_64]
x_loop:
push ecx
mov eax,[esi] // read all 8 pixels
bswap eax
add esi,4
mov edx,eax
// 1st dword output {
shr eax,23
and eax,0x1E
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
mov eax,edx
shr eax,27
and eax,0x1E
mov cx,[ebx+eax]
ror cx,1
mov [edi],ecx
add edi,4
// }
// 2nd dword output {
mov eax,edx
shr eax,15
and eax,0x1E
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
mov eax,edx
shr eax,19
and eax,0x1E
mov cx,[ebx+eax]
ror cx,1
mov [edi],ecx
add edi,4
// }
// 3rd dword output {
mov eax,edx
shr eax,7
and eax,0x1E
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
mov eax,edx
shr eax,11
and eax,0x1E
mov cx,[ebx+eax]
ror cx,1
mov [edi],ecx
add edi,4
// }
// 4th dword output {
mov eax,edx
shl eax,1
and eax,0x1E
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
shr edx,3
and edx,0x1E
mov cx,[ebx+edx]
ror cx,1
mov [edi],ecx
add edi,4
// }
// * copy
mov eax,[esi] // read all 8 pixels
bswap eax
add esi,4
mov edx,eax
// 1st dword output {
shr eax,23
and eax,0x1E
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
mov eax,edx
shr eax,27
and eax,0x1E
mov cx,[ebx+eax]
ror cx,1
mov [edi],ecx
add edi,4
// }
// 2nd dword output {
mov eax,edx
shr eax,15
and eax,0x1E
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
mov eax,edx
shr eax,19
and eax,0x1E
mov cx,[ebx+eax]
ror cx,1
mov [edi],ecx
add edi,4
// }
// 3rd dword output {
mov eax,edx
shr eax,7
and eax,0x1E
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
mov eax,edx
shr eax,11
and eax,0x1E
mov cx,[ebx+eax]
ror cx,1
mov [edi],ecx
add edi,4
// }
// 4th dword output {
mov eax,edx
shl eax,1
and eax,0x1E
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
shr edx,3
and edx,0x1E
mov cx,[ebx+edx]
ror cx,1
mov [edi],ecx
add edi,4
// }
// *
pop ecx
dec ecx
jnz x_loop
pop ecx
dec ecx
jz near end_y_loop
push ecx
mov eax,esi
add eax,[line]
mov esi,[src]
sub eax,esi
and eax,0x7FF
add esi,eax
add edi,[ext]
mov ecx,[wid_64]
x_loop_2:
push ecx
mov eax,[esi+4] // read all 8 pixels
bswap eax
mov edx,eax
// 1st dword output {
shr eax,23
and eax,0x1E
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
mov eax,edx
shr eax,27
and eax,0x1E
mov cx,[ebx+eax]
ror cx,1
mov [edi],ecx
add edi,4
// }
// 2nd dword output {
mov eax,edx
shr eax,15
and eax,0x1E
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
mov eax,edx
shr eax,19
and eax,0x1E
mov cx,[ebx+eax]
ror cx,1
mov [edi],ecx
add edi,4
// }
// 3rd dword output {
mov eax,edx
shr eax,7
and eax,0x1E
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
mov eax,edx
shr eax,11
and eax,0x1E
mov cx,[ebx+eax]
ror cx,1
mov [edi],ecx
add edi,4
// }
// 4th dword output {
mov eax,edx
shl eax,1
and eax,0x1E
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
shr edx,3
and edx,0x1E
mov cx,[ebx+edx]
ror cx,1
mov [edi],ecx
add edi,4
// }
// * copy
mov eax,[esi] // read all 8 pixels
bswap eax
mov edx,esi
add edx,8
mov esi,[src]
sub edx,esi
and edx,0x7FF
add esi,edx
mov edx,eax
// 1st dword output {
shr eax,23
and eax,0x1E
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
mov eax,edx
shr eax,27
and eax,0x1E
mov cx,[ebx+eax]
ror cx,1
mov [edi],ecx
add edi,4
// }
// 2nd dword output {
mov eax,edx
shr eax,15
and eax,0x1E
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
mov eax,edx
shr eax,19
and eax,0x1E
mov cx,[ebx+eax]
ror cx,1
mov [edi],ecx
add edi,4
// }
// 3rd dword output {
mov eax,edx
shr eax,7
and eax,0x1E
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
mov eax,edx
shr eax,11
and eax,0x1E
mov cx,[ebx+eax]
ror cx,1
mov [edi],ecx
add edi,4
// }
// 4th dword output {
mov eax,edx
shl eax,1
and eax,0x1E
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
shr edx,3
and edx,0x1E
mov cx,[ebx+edx]
ror cx,1
mov [edi],ecx
add edi,4
// }
// *
pop ecx
dec ecx
jnz x_loop_2
mov eax,esi
add eax,[line]
mov esi,[src]
sub eax,esi
and eax,0x7FF
add esi,eax
add edi,[ext]
pop ecx
dec ecx
jnz y_loop
end_y_loop:
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret
}
}
extern "C" void __declspec(naked) asmLoad4bIAPal (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext, wxUIntPtr pal)
{
_asm {
push ebp
mov ebp, esp
push ebx
push esi
push edi
mov ebx,[pal]
mov esi,[src]
mov edi,[dst]
mov ecx,[height]
y_loop:
push ecx
mov ecx,[wid_64]
x_loop:
push ecx
mov eax,[esi] // read all 8 pixels
bswap eax
add esi,4
mov edx,eax
// 1st dword output {
shr eax,23
and eax,0x1E
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
mov eax,edx
shr eax,27
and eax,0x1E
mov cx,[ebx+eax]
ror cx,8
mov [edi],ecx
add edi,4
// }
// 2nd dword output {
mov eax,edx
shr eax,15
and eax,0x1E
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
mov eax,edx
shr eax,19
and eax,0x1E
mov cx,[ebx+eax]
ror cx,8
mov [edi],ecx
add edi,4
// }
// 3rd dword output {
mov eax,edx
shr eax,7
and eax,0x1E
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
mov eax,edx
shr eax,11
and eax,0x1E
mov cx,[ebx+eax]
ror cx,8
mov [edi],ecx
add edi,4
// }
// 4th dword output {
mov eax,edx
shl eax,1
and eax,0x1E
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
shr edx,3
and edx,0x1E
mov cx,[ebx+edx]
ror cx,8
mov [edi],ecx
add edi,4
// }
// * copy
mov eax,[esi] // read all 8 pixels
bswap eax
add esi,4
mov edx,eax
// 1st dword output {
shr eax,23
and eax,0x1E
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
mov eax,edx
shr eax,27
and eax,0x1E
mov cx,[ebx+eax]
ror cx,8
mov [edi],ecx
add edi,4
// }
// 2nd dword output {
mov eax,edx
shr eax,15
and eax,0x1E
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
mov eax,edx
shr eax,19
and eax,0x1E
mov cx,[ebx+eax]
ror cx,8
mov [edi],ecx
add edi,4
// }
// 3rd dword output {
mov eax,edx
shr eax,7
and eax,0x1E
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
mov eax,edx
shr eax,11
and eax,0x1E
mov cx,[ebx+eax]
ror cx,8
mov [edi],ecx
add edi,4
// }
// 4th dword output {
mov eax,edx
shl eax,1
and eax,0x1E
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
shr edx,3
and edx,0x1E
mov cx,[ebx+edx]
ror cx,8
mov [edi],ecx
add edi,4
// }
// *
pop ecx
dec ecx
jnz x_loop
pop ecx
dec ecx
jz near end_y_loop
push ecx
mov eax,esi
add eax,[line]
mov esi,[src]
sub eax,esi
and eax,0x7FF
add esi,eax
add edi,[ext]
mov ecx,[wid_64]
x_loop_2:
push ecx
mov eax,[esi+4] // read all 8 pixels
bswap eax
mov edx,eax
// 1st dword output {
shr eax,23
and eax,0x1E
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
mov eax,edx
shr eax,27
and eax,0x1E
mov cx,[ebx+eax]
ror cx,8
mov [edi],ecx
add edi,4
// }
// 2nd dword output {
mov eax,edx
shr eax,15
and eax,0x1E
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
mov eax,edx
shr eax,19
and eax,0x1E
mov cx,[ebx+eax]
ror cx,8
mov [edi],ecx
add edi,4
// }
// 3rd dword output {
mov eax,edx
shr eax,7
and eax,0x1E
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
mov eax,edx
shr eax,11
and eax,0x1E
mov cx,[ebx+eax]
ror cx,8
mov [edi],ecx
add edi,4
// }
// 4th dword output {
mov eax,edx
shl eax,1
and eax,0x1E
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
shr edx,3
and edx,0x1E
mov cx,[ebx+edx]
ror cx,8
mov [edi],ecx
add edi,4
// }
// * copy
mov eax,[esi] // read all 8 pixels
bswap eax
mov edx,esi
add edx,8
mov esi,[src]
sub edx,esi
and edx,0x7FF
add esi,edx
mov edx,eax
// 1st dword output {
shr eax,23
and eax,0x1E
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
mov eax,edx
shr eax,27
and eax,0x1E
mov cx,[ebx+eax]
ror cx,8
mov [edi],ecx
add edi,4
// }
// 2nd dword output {
mov eax,edx
shr eax,15
and eax,0x1E
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
mov eax,edx
shr eax,19
and eax,0x1E
mov cx,[ebx+eax]
ror cx,8
mov [edi],ecx
add edi,4
// }
// 3rd dword output {
mov eax,edx
shr eax,7
and eax,0x1E
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
mov eax,edx
shr eax,11
and eax,0x1E
mov cx,[ebx+eax]
ror cx,8
mov [edi],ecx
add edi,4
// }
// 4th dword output {
mov eax,edx
shl eax,1
and eax,0x1E
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
shr edx,3
and edx,0x1E
mov cx,[ebx+edx]
ror cx,8
mov [edi],ecx
add edi,4
// }
// *
pop ecx
dec ecx
jnz x_loop_2
mov eax,esi
add eax,[line]
mov esi,[src]
sub eax,esi
and eax,0x7FF
add esi,eax
add edi,[ext]
pop ecx
dec ecx
jnz y_loop
end_y_loop:
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret
}
}
extern "C" void __declspec(naked) asmLoad4bIA (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext)
{
_asm {
push ebp
mov ebp, esp
push ebx
push esi
push edi
mov esi,[src]
mov edi,[dst]
mov ecx,[height]
y_loop:
push ecx
mov ecx,[wid_64]
x_loop:
push ecx
mov eax,[esi] // read all 8 pixels
bswap eax
add esi,4
mov edx,eax
// 1st dword {
xor ecx,ecx
// pixel #1
// IIIAxxxxxxxxxxxxxxxxxxxxxxxxxxxx
// xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
mov eax,edx
shr eax,24 //Alpha
and eax,0x00000010
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
mov eax,edx
shr eax,28 // Intensity
and eax,0x0000000E
or ecx,eax
shr eax,3
or ecx,eax
// pixel #2
// xxxxIIIAxxxxxxxxxxxxxxxxxxxxxxxx
// xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
mov eax,edx
shr eax,12 //Alpha
and eax,0x00001000
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
mov eax,edx
shr eax,16 // Intensity
and eax,0x00000E00
or ecx,eax
shr eax,3
and eax,0x00000100
or ecx,eax
// pixel #3
// xxxxxxxxIIIAxxxxxxxxxxxxxxxxxxxx
// xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
//Alpha
mov eax,edx
and eax,0x00100000
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
mov eax,edx
shr eax,4 // Intensity
and eax,0x000E0000
or ecx,eax
shr eax,3
and eax,0x00010000
or ecx,eax
// pixel #4
// xxxxxxxxxxxxIIIAxxxxxxxxxxxxxxxx
// AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
mov eax,edx
shl eax,12 //Alpha
and eax,0x10000000
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
mov eax,edx
shl eax,8 // Intensity
and eax,0x0E000000
or ecx,eax
shr eax,3
and eax,0x01000000
or ecx,eax
mov [edi],ecx
add edi,4
// }
// 2nd dword {
xor ecx,ecx
// pixel #5
// xxxxxxxxxxxxxxxxIIIAxxxxxxxxxxxx
// xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
mov eax,edx
shr eax,8 //Alpha
and eax,0x00000010
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
mov eax,edx
shr eax,12 // Intensity
and eax,0x0000000E
or ecx,eax
shr eax,3
or ecx,eax
// pixel #6
// xxxxxxxxxxxxxxxxxxxxIIIAxxxxxxxx
// xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
//Alpha
mov eax,edx
shl eax,4
and eax,0x00001000
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
mov eax,edx // Intensity
and eax,0x00000E00
or ecx,eax
shr eax,3
and eax,0x00000100
or ecx,eax
// pixel #7
// xxxxxxxxxxxxxxxxxxxxxxxxIIIAxxxx
// xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
//Alpha
mov eax,edx
shl eax,16
and eax,0x00100000
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
mov eax,edx
shl eax,12 // Intensity
and eax,0x000E0000
or ecx,eax
shr eax,3
and eax,0x00010000
or ecx,eax
// pixel #8
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxIIIA
// AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
mov eax,edx
shl eax,28 //Alpha
and eax,0x10000000
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
mov eax,edx
shl eax,24 // Intensity
and eax,0x0E000000
or ecx,eax
shr eax,3
and eax,0x01000000
or ecx,eax
mov [edi],ecx
add edi,4
// }
// * copy
mov eax,[esi] // read all 8 pixels
bswap eax
add esi,4
mov edx,eax
// 1st dword {
xor ecx,ecx
// pixel #1
// IIIAxxxxxxxxxxxxxxxxxxxxxxxxxxxx
// xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
mov eax,edx
shr eax,24 //Alpha
and eax,0x00000010
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
mov eax,edx
shr eax,28 // Intensity
and eax,0x0000000E
or ecx,eax
shr eax,3
or ecx,eax
// pixel #2
// xxxxIIIAxxxxxxxxxxxxxxxxxxxxxxxx
// xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
mov eax,edx
shr eax,12 //Alpha
and eax,0x00001000
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
mov eax,edx
shr eax,16 // Intensity
and eax,0x00000E00
or ecx,eax
shr eax,3
and eax,0x00000100
or ecx,eax
// pixel #3
// xxxxxxxxIIIAxxxxxxxxxxxxxxxxxxxx
// xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
//Alpha
mov eax,edx
and eax,0x00100000
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
mov eax,edx
shr eax,4 // Intensity
and eax,0x000E0000
or ecx,eax
shr eax,3
and eax,0x00010000
or ecx,eax
// pixel #4
// xxxxxxxxxxxxIIIAxxxxxxxxxxxxxxxx
// AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
mov eax,edx
shl eax,12 //Alpha
and eax,0x10000000
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
mov eax,edx
shl eax,8 // Intensity
and eax,0x0E000000
or ecx,eax
shr eax,3
and eax,0x01000000
or ecx,eax
mov [edi],ecx
add edi,4
// }
// 2nd dword {
xor ecx,ecx
// pixel #5
// xxxxxxxxxxxxxxxxIIIAxxxxxxxxxxxx
// xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
mov eax,edx
shr eax,8 //Alpha
and eax,0x00000010
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
mov eax,edx
shr eax,12 // Intensity
and eax,0x0000000E
or ecx,eax
shr eax,3
or ecx,eax
// pixel #6
// xxxxxxxxxxxxxxxxxxxxIIIAxxxxxxxx
// xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
//Alpha
mov eax,edx
shl eax,4
and eax,0x00001000
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
mov eax,edx // Intensity
and eax,0x00000E00
or ecx,eax
shr eax,3
and eax,0x00000100
or ecx,eax
// pixel #7
// xxxxxxxxxxxxxxxxxxxxxxxxIIIAxxxx
// xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
//Alpha
mov eax,edx
shl eax,16
and eax,0x00100000
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
mov eax,edx
shl eax,12 // Intensity
and eax,0x000E0000
or ecx,eax
shr eax,3
and eax,0x00010000
or ecx,eax
// pixel #8
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxIIIA
// AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
mov eax,edx
shl eax,28 //Alpha
and eax,0x10000000
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
mov eax,edx
shl eax,24 // Intensity
and eax,0x0E000000
or ecx,eax
shr eax,3
and eax,0x01000000
or ecx,eax
mov [edi],ecx
add edi,4
// }
// *
pop ecx
dec ecx
jnz x_loop
pop ecx
dec ecx
jz near end_y_loop
push ecx
add esi,[line]
add edi,[ext]
mov ecx,[wid_64]
x_loop_2:
push ecx
mov eax,[esi+4] // read all 8 pixels
bswap eax
mov edx,eax
// 1st dword {
xor ecx,ecx
// pixel #1
// IIIAxxxxxxxxxxxxxxxxxxxxxxxxxxxx
// xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
mov eax,edx
shr eax,24 //Alpha
and eax,0x00000010
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
mov eax,edx
shr eax,28 // Intensity
and eax,0x0000000E
or ecx,eax
shr eax,3
or ecx,eax
// pixel #2
// xxxxIIIAxxxxxxxxxxxxxxxxxxxxxxxx
// xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
mov eax,edx
shr eax,12 //Alpha
and eax,0x00001000
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
mov eax,edx
shr eax,16 // Intensity
and eax,0x00000E00
or ecx,eax
shr eax,3
and eax,0x00000100
or ecx,eax
// pixel #3
// xxxxxxxxIIIAxxxxxxxxxxxxxxxxxxxx
// xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
//Alpha
mov eax,edx
and eax,0x00100000
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
mov eax,edx
shr eax,4 // Intensity
and eax,0x000E0000
or ecx,eax
shr eax,3
and eax,0x00010000
or ecx,eax
// pixel #4
// xxxxxxxxxxxxIIIAxxxxxxxxxxxxxxxx
// AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
mov eax,edx
shl eax,12 //Alpha
and eax,0x10000000
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
mov eax,edx
shl eax,8 // Intensity
and eax,0x0E000000
or ecx,eax
shr eax,3
and eax,0x01000000
or ecx,eax
mov [edi],ecx
add edi,4
// }
// 2nd dword {
xor ecx,ecx
// pixel #5
// xxxxxxxxxxxxxxxxIIIAxxxxxxxxxxxx
// xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
mov eax,edx
shr eax,8 //Alpha
and eax,0x00000010
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
mov eax,edx
shr eax,12 // Intensity
and eax,0x0000000E
or ecx,eax
shr eax,3
or ecx,eax
// pixel #6
// xxxxxxxxxxxxxxxxxxxxIIIAxxxxxxxx
// xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
//Alpha
mov eax,edx
shl eax,4
and eax,0x00001000
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
mov eax,edx // Intensity
and eax,0x00000E00
or ecx,eax
shr eax,3
and eax,0x00000100
or ecx,eax
// pixel #7
// xxxxxxxxxxxxxxxxxxxxxxxxIIIAxxxx
// xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
//Alpha
mov eax,edx
shl eax,16
and eax,0x00100000
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
mov eax,edx
shl eax,12 // Intensity
and eax,0x000E0000
or ecx,eax
shr eax,3
and eax,0x00010000
or ecx,eax
// pixel #8
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxIIIA
// AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
mov eax,edx
shl eax,28 //Alpha
and eax,0x10000000
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
mov eax,edx
shl eax,24 // Intensity
and eax,0x0E000000
or ecx,eax
shr eax,3
and eax,0x01000000
or ecx,eax
mov [edi],ecx
add edi,4
// }
// * copy
mov eax,[esi] // read all 8 pixels
bswap eax
add esi,8
mov edx,eax
// 1st dword {
xor ecx,ecx
// pixel #1
// IIIAxxxxxxxxxxxxxxxxxxxxxxxxxxxx
// xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
mov eax,edx
shr eax,24 //Alpha
and eax,0x00000010
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
mov eax,edx
shr eax,28 // Intensity
and eax,0x0000000E
or ecx,eax
shr eax,3
or ecx,eax
// pixel #2
// xxxxIIIAxxxxxxxxxxxxxxxxxxxxxxxx
// xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
mov eax,edx
shr eax,12 //Alpha
and eax,0x00001000
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
mov eax,edx
shr eax,16 // Intensity
and eax,0x00000E00
or ecx,eax
shr eax,3
and eax,0x00000100
or ecx,eax
// pixel #3
// xxxxxxxxIIIAxxxxxxxxxxxxxxxxxxxx
// xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
//Alpha
mov eax,edx
and eax,0x00100000
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
mov eax,edx
shr eax,4 // Intensity
and eax,0x000E0000
or ecx,eax
shr eax,3
and eax,0x00010000
or ecx,eax
// pixel #4
// xxxxxxxxxxxxIIIAxxxxxxxxxxxxxxxx
// AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
mov eax,edx
shl eax,12 //Alpha
and eax,0x10000000
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
mov eax,edx
shl eax,8 // Intensity
and eax,0x0E000000
or ecx,eax
shr eax,3
and eax,0x01000000
or ecx,eax
mov [edi],ecx
add edi,4
// }
// 2nd dword {
xor ecx,ecx
// pixel #5
// xxxxxxxxxxxxxxxxIIIAxxxxxxxxxxxx
// xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII
mov eax,edx
shr eax,8 //Alpha
and eax,0x00000010
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
mov eax,edx
shr eax,12 // Intensity
and eax,0x0000000E
or ecx,eax
shr eax,3
or ecx,eax
// pixel #6
// xxxxxxxxxxxxxxxxxxxxIIIAxxxxxxxx
// xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx
//Alpha
mov eax,edx
shl eax,4
and eax,0x00001000
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
mov eax,edx // Intensity
and eax,0x00000E00
or ecx,eax
shr eax,3
and eax,0x00000100
or ecx,eax
// pixel #7
// xxxxxxxxxxxxxxxxxxxxxxxxIIIAxxxx
// xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx
//Alpha
mov eax,edx
shl eax,16
and eax,0x00100000
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
mov eax,edx
shl eax,12 // Intensity
and eax,0x000E0000
or ecx,eax
shr eax,3
and eax,0x00010000
or ecx,eax
// pixel #8
// xxxxxxxxxxxxxxxxxxxxxxxxxxxxIIIA
// AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx
mov eax,edx
shl eax,28 //Alpha
and eax,0x10000000
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
shl eax,1
or ecx,eax
mov eax,edx
shl eax,24 // Intensity
and eax,0x0E000000
or ecx,eax
shr eax,3
and eax,0x01000000
or ecx,eax
mov [edi],ecx
add edi,4
// }
// *
pop ecx
dec ecx
jnz x_loop_2
add esi,[line]
add edi,[ext]
pop ecx
dec ecx
jnz y_loop
end_y_loop:
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret
}
}
extern "C" void __declspec(naked) asmLoad4bI (wxUIntPtr src, int dst, wxUIntPtr wid_64, int height, int line, int ext)
{
_asm {
push ebp
mov ebp, esp
push ebx
push esi
push edi
mov esi,[src]
mov edi,[dst]
mov ecx,[height]
y_loop:
push ecx
mov ecx,[wid_64]
x_loop:
push ecx
mov eax,[esi] // read all 8 pixels
bswap eax
add esi,4
mov edx,eax
// 1st dword {
xor ecx,ecx
shr eax,28 // 0xF0000000 -> 0x0000000F
or ecx,eax
shl eax,4
or ecx,eax
mov eax,edx // 0x0F000000 -> 0x00000F00
shr eax,16
and eax,0x00000F00
or ecx,eax
shl eax,4
or ecx,eax
mov eax,edx
shr eax,4 // 0x00F00000 -> 0x000F0000
and eax,0x000F0000
or ecx,eax
shl eax,4
or ecx,eax
mov eax,edx
shl eax,8 // 0x000F0000 -> 0x0F000000
and eax,0x0F000000
or ecx,eax
shl eax,4
or ecx,eax
mov [edi],ecx
add edi,4
// }
// 2nd dword {
xor ecx,ecx
mov eax,edx
shr eax,12 // 0x0000F000 -> 0x0000000F
and eax,0x0000000F
or ecx,eax
shl eax,4
or ecx,eax
mov eax,edx // 0x00000F00 -> 0x00000F00
and eax,0x00000F00
or ecx,eax
shl eax,4
or ecx,eax
mov eax,edx
shl eax,12 // 0x000000F0 -> 0x000F0000
and eax,0x000F0000
or ecx,eax
shl eax,4
or ecx,eax
shl edx,24 // 0x0000000F -> 0x0F000000
and edx,0x0F000000
or ecx,edx
shl edx,4
or ecx,edx
mov [edi],ecx
add edi,4
// }
// * copy
mov eax,[esi] // read all 8 pixels
bswap eax
add esi,4
mov edx,eax
// 1st dword {
xor ecx,ecx
shr eax,28 // 0xF0000000 -> 0x0000000F
or ecx,eax
shl eax,4
or ecx,eax
mov eax,edx // 0x0F000000 -> 0x00000F00
shr eax,16
and eax,0x00000F00
or ecx,eax
shl eax,4
or ecx,eax
mov eax,edx
shr eax,4 // 0x00F00000 -> 0x000F0000
and eax,0x000F0000
or ecx,eax
shl eax,4
or ecx,eax
mov eax,edx
shl eax,8 // 0x000F0000 -> 0x0F000000
and eax,0x0F000000
or ecx,eax
shl eax,4
or ecx,eax
mov [edi],ecx
add edi,4
// }
// 2nd dword {
xor ecx,ecx
mov eax,edx
shr eax,12 // 0x0000F000 -> 0x0000000F
and eax,0x0000000F
or ecx,eax
shl eax,4
or ecx,eax
mov eax,edx // 0x00000F00 -> 0x00000F00
and eax,0x00000F00
or ecx,eax
shl eax,4
or ecx,eax
mov eax,edx
shl eax,12 // 0x000000F0 -> 0x000F0000
and eax,0x000F0000
or ecx,eax
shl eax,4
or ecx,eax
shl edx,24 // 0x0000000F -> 0x0F000000
and edx,0x0F000000
or ecx,edx
shl edx,4
or ecx,edx
mov [edi],ecx
add edi,4
// }
// *
pop ecx
dec ecx
jnz x_loop
pop ecx
dec ecx
jz near end_y_loop
push ecx
add esi,[line]
add edi,[ext]
mov ecx,[wid_64]
x_loop_2:
push ecx
mov eax,[esi+4] // read all 8 pixels
bswap eax
mov edx,eax
// 1st dword {
xor ecx,ecx
shr eax,28 // 0xF0000000 -> 0x0000000F
or ecx,eax
shl eax,4
or ecx,eax
mov eax,edx // 0x0F000000 -> 0x00000F00
shr eax,16
and eax,0x00000F00
or ecx,eax
shl eax,4
or ecx,eax
mov eax,edx
shr eax,4 // 0x00F00000 -> 0x000F0000
and eax,0x000F0000
or ecx,eax
shl eax,4
or ecx,eax
mov eax,edx
shl eax,8 // 0x000F0000 -> 0x0F000000
and eax,0x0F000000
or ecx,eax
shl eax,4
or ecx,eax
mov [edi],ecx
add edi,4
// }
// 2nd dword {
xor ecx,ecx
mov eax,edx
shr eax,12 // 0x0000F000 -> 0x0000000F
and eax,0x0000000F
or ecx,eax
shl eax,4
or ecx,eax
mov eax,edx // 0x00000F00 -> 0x00000F00
and eax,0x00000F00
or ecx,eax
shl eax,4
or ecx,eax
mov eax,edx
shl eax,12 // 0x000000F0 -> 0x000F0000
and eax,0x000F0000
or ecx,eax
shl eax,4
or ecx,eax
shl edx,24 // 0x0000000F -> 0x0F000000
and edx,0x0F000000
or ecx,edx
shl edx,4
or ecx,edx
mov [edi],ecx
add edi,4
// }
// * copy
mov eax,[esi] // read all 8 pixels
bswap eax
add esi,8
mov edx,eax
// 1st dword {
xor ecx,ecx
shr eax,28 // 0xF0000000 -> 0x0000000F
or ecx,eax
shl eax,4
or ecx,eax
mov eax,edx // 0x0F000000 -> 0x00000F00
shr eax,16
and eax,0x00000F00
or ecx,eax
shl eax,4
or ecx,eax
mov eax,edx
shr eax,4 // 0x00F00000 -> 0x000F0000
and eax,0x000F0000
or ecx,eax
shl eax,4
or ecx,eax
mov eax,edx
shl eax,8 // 0x000F0000 -> 0x0F000000
and eax,0x0F000000
or ecx,eax
shl eax,4
or ecx,eax
mov [edi],ecx
add edi,4
// }
// 2nd dword {
xor ecx,ecx
mov eax,edx
shr eax,12 // 0x0000F000 -> 0x0000000F
and eax,0x0000000F
or ecx,eax
shl eax,4
or ecx,eax
mov eax,edx // 0x00000F00 -> 0x00000F00
and eax,0x00000F00
or ecx,eax
shl eax,4
or ecx,eax
mov eax,edx
shl eax,12 // 0x000000F0 -> 0x000F0000
and eax,0x000F0000
or ecx,eax
shl eax,4
or ecx,eax
shl edx,24 // 0x0000000F -> 0x0F000000
and edx,0x0F000000
or ecx,edx
shl edx,4
or ecx,edx
mov [edi],ecx
add edi,4
// }
// *
pop ecx
dec ecx
jnz x_loop_2
add esi,[line]
add edi,[ext]
pop ecx
dec ecx
jnz y_loop
end_y_loop:
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret
}
}
//****************************************************************
// Size: 0, Format: 2
wxUint32 Load4bCI (wxUIntPtr dst, wxUIntPtr src, int wid_64, int height, int line, int real_width, int tile)
{
if (wid_64 < 1) wid_64 = 1;
if (height < 1) height = 1;
int ext = (real_width - (wid_64 << 4)) << 1;
if (rdp.tlut_mode == 0)
{
//in tlut DISABLE mode load CI texture as plain intensity texture instead of palette dereference.
//Thanks to angrylion for the advice
asmLoad4bI (src, dst, wid_64, height, line, ext);
return /*(0 << 16) | */GR_TEXFMT_ALPHA_INTENSITY_44;
}
wxUIntPtr pal = wxPtrToUInt(rdp.pal_8 + (rdp.tiles[tile].palette << 4));
if (rdp.tlut_mode == 2)
{
asmLoad4bCI (src, dst, wid_64, height, line, ext, pal);
return (1 << 16) | GR_TEXFMT_ARGB_1555;
}
asmLoad4bIAPal (src, dst, wid_64, height, line, ext, pal);
return (1 << 16) | GR_TEXFMT_ALPHA_INTENSITY_88;
}
//****************************************************************
// Size: 0, Format: 3
//
// ** BY GUGAMAN **
wxUint32 Load4bIA (wxUIntPtr dst, wxUIntPtr src, int wid_64, int height, int line, int real_width, int tile)
{
if (rdp.tlut_mode != 0)
return Load4bCI (dst, src, wid_64, height, line, real_width, tile);
if (wid_64 < 1) wid_64 = 1;
if (height < 1) height = 1;
int ext = (real_width - (wid_64 << 4));
asmLoad4bIA (src, dst, wid_64, height, line, ext);
return /*(0 << 16) | */GR_TEXFMT_ALPHA_INTENSITY_44;
}
//****************************************************************
// Size: 0, Format: 4
wxUint32 Load4bI (wxUIntPtr dst, wxUIntPtr src, int wid_64, int height, int line, int real_width, int tile)
{
if (rdp.tlut_mode != 0)
return Load4bCI (dst, src, wid_64, height, line, real_width, tile);
if (wid_64 < 1) wid_64 = 1;
if (height < 1) height = 1;
int ext = (real_width - (wid_64 << 4));
asmLoad4bI (src, dst, wid_64, height, line, ext);
return /*(0 << 16) | */GR_TEXFMT_ALPHA_INTENSITY_44;
}
//****************************************************************
// Size: 0, Format: 0
wxUint32 Load4bSelect (wxUIntPtr dst, wxUIntPtr src, int wid_64, int height, int line, int real_width, int tile)
{
if (rdp.tlut_mode == 0)
return Load4bI (dst, src, wid_64, height, line, real_width, tile);
return Load4bCI (dst, src, wid_64, height, line, real_width, tile);
}