/* * Glide64 - Glide video plugin for Nintendo 64 emulators. * Copyright (c) 2002 Dave2001 * Copyright (c) 2008 Günther * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public * License along with this program; if not, write to the Free * Software Foundation, Inc., 51 Franklin Street, Fifth Floor, * Boston, MA 02110-1301, USA */ //**************************************************************** // // Glide64 - Glide Plugin for Nintendo 64 emulators (tested mostly with Project64) // Project started on December 29th, 2001 // // To modify Glide64: // * Write your name and (optional)email, commented by your work, so I know who did it, and so that you can find which parts you modified when it comes time to send it to me. // * Do NOT send me the whole project or file that you modified. Take out your modified code sections, and tell me where to put them. If people sent the whole thing, I would have many different versions, but no idea how to combine them all. // // Official Glide64 development channel: #Glide64 on EFnet // // Original author: Dave2001 (Dave2999@hotmail.com) // Other authors: Gonetz, Gugaman // //**************************************************************** //**************************************************************** // Size: 0, Format: 2 DWORD Load4bCI (unsigned char * dst, unsigned char * src, int wid_64, int height, int line, int real_width, int tile) { if (wid_64 < 1) wid_64 = 1; if (height < 1) height = 1; int ext = (real_width - (wid_64 << 4)) << 1; unsigned short * pal = (rdp.pal_8 + (rdp.tiles[tile].palette << 4)); if (rdp.tlut_mode == 2) { #if !defined(__GNUC__) && !defined(NO_ASM) __asm { mov ebx,dword ptr [pal] mov esi,dword ptr [src] mov edi,dword ptr [dst] mov ecx,dword ptr [height] y_loop: push ecx mov ecx,dword ptr [wid_64] x_loop: push ecx mov eax,dword ptr [esi] // read all 8 pixels bswap eax add esi,4 mov edx,eax // 1st dword output { shr eax,23 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,1 shl ecx,16 mov eax,edx shr eax,27 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,1 mov dword ptr [edi],ecx add edi,4 // } // 2nd dword output { mov eax,edx shr eax,15 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,1 shl ecx,16 mov eax,edx shr eax,19 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,1 mov dword ptr [edi],ecx add edi,4 // } // 3rd dword output { mov eax,edx shr eax,7 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,1 shl ecx,16 mov eax,edx shr eax,11 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,1 mov dword ptr [edi],ecx add edi,4 // } // 4th dword output { mov eax,edx shl eax,1 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,1 shl ecx,16 shr edx,3 and edx,0x1E mov cx,word ptr [ebx+edx] ror cx,1 mov dword ptr [edi],ecx add edi,4 // } // * copy mov eax,dword ptr [esi] // read all 8 pixels bswap eax add esi,4 mov edx,eax // 1st dword output { shr eax,23 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,1 shl ecx,16 mov eax,edx shr eax,27 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,1 mov dword ptr [edi],ecx add edi,4 // } // 2nd dword output { mov eax,edx shr eax,15 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,1 shl ecx,16 mov eax,edx shr eax,19 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,1 mov dword ptr [edi],ecx add edi,4 // } // 3rd dword output { mov eax,edx shr eax,7 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,1 shl ecx,16 mov eax,edx shr eax,11 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,1 mov dword ptr [edi],ecx add edi,4 // } // 4th dword output { mov eax,edx shl eax,1 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,1 shl ecx,16 shr edx,3 and edx,0x1E mov cx,word ptr [ebx+edx] ror cx,1 mov dword ptr [edi],ecx add edi,4 // } // * pop ecx dec ecx jnz x_loop pop ecx dec ecx jz end_y_loop push ecx add esi,dword ptr [line] add edi,dword ptr [ext] mov ecx,dword ptr [wid_64] x_loop_2: push ecx mov eax,dword ptr [esi+4] // read all 8 pixels bswap eax mov edx,eax // 1st dword output { shr eax,23 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,1 shl ecx,16 mov eax,edx shr eax,27 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,1 mov dword ptr [edi],ecx add edi,4 // } // 2nd dword output { mov eax,edx shr eax,15 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,1 shl ecx,16 mov eax,edx shr eax,19 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,1 mov dword ptr [edi],ecx add edi,4 // } // 3rd dword output { mov eax,edx shr eax,7 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,1 shl ecx,16 mov eax,edx shr eax,11 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,1 mov dword ptr [edi],ecx add edi,4 // } // 4th dword output { mov eax,edx shl eax,1 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,1 shl ecx,16 shr edx,3 and edx,0x1E mov cx,word ptr [ebx+edx] ror cx,1 mov dword ptr [edi],ecx add edi,4 // } // * copy mov eax,dword ptr [esi] // read all 8 pixels bswap eax add esi,8 mov edx,eax // 1st dword output { shr eax,23 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,1 shl ecx,16 mov eax,edx shr eax,27 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,1 mov dword ptr [edi],ecx add edi,4 // } // 2nd dword output { mov eax,edx shr eax,15 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,1 shl ecx,16 mov eax,edx shr eax,19 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,1 mov dword ptr [edi],ecx add edi,4 // } // 3rd dword output { mov eax,edx shr eax,7 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,1 shl ecx,16 mov eax,edx shr eax,11 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,1 mov dword ptr [edi],ecx add edi,4 // } // 4th dword output { mov eax,edx shl eax,1 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,1 shl ecx,16 shr edx,3 and edx,0x1E mov cx,word ptr [ebx+edx] ror cx,1 mov dword ptr [edi],ecx add edi,4 // } // * pop ecx dec ecx jnz x_loop_2 add esi,dword ptr [line] add edi,dword ptr [ext] pop ecx dec ecx jnz y_loop end_y_loop: } #elif !defined(NO_ASM) //printf("Load4bCI1\n"); // This way, gcc generates either a 32 bit or a 64 bit register long lTempX, lTempY, lHeight = (long) height; intptr_t fake_eax, fake_edx; asm volatile ( "1: \n" // y_loop "mov %[c], %[tempy] \n" "mov %[wid_64], %%ecx \n" "2: \n" // x_loop "mov %[c], %[tempx] \n" "mov (%[src]), %%eax \n" // read all 8 pixels "bswap %%eax \n" "add $4, %[src] \n" "mov %%eax, %%edx \n" // 1st dword output { "shr $23, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $1, %%cx \n" "shl $16, %%ecx \n" "mov %%edx, %%eax \n" "shr $27, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $1, %%cx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // 2nd dword output { "mov %%edx, %%eax \n" "shr $15, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $1, %%cx \n" "shl $16, %%ecx \n" "mov %%edx, %%eax \n" "shr $19, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $1, %%cx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // 3rd dword output { "mov %%edx, %%eax \n" "shr $7,%%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]),%%cx \n" "ror $1,%%cx \n" "shl $16,%%ecx \n" "mov %%edx, %%eax \n" "shr $11, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $1, %%cx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // 4th dword output { "mov %%edx, %%eax \n" "shl $1, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $1, %%cx \n" "shl $16, %%ecx \n" "shr $3, %%edx \n" "and $0x1E, %%edx \n" "mov (%[pal],%[d]), %%cx \n" "ror $1, %%cx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // * copy "mov (%[src]), %%eax \n" // read all 8 pixels "bswap %%eax \n" "add $4, %[src] \n" "mov %%eax, %%edx \n" // 1st dword output { "shr $23, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $1, %%cx \n" "shl $16, %%ecx \n" "mov %%edx, %%eax \n" "shr $27, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $1, %%cx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // 2nd dword output { "mov %%edx, %%eax \n" "shr $15, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $1, %%cx \n" "shl $16, %%ecx \n" "mov %%edx, %%eax \n" "shr $19, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $1, %%cx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // 3rd dword output { "mov %%edx, %%eax \n" "shr $7, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $1, %%cx \n" "shl $16, %%ecx \n" "mov %%edx, %%eax \n" "shr $11, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $1, %%cx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // 4th dword output { "mov %%edx, %%eax \n" "shl $1, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $1, %%cx \n" "shl $16, %%ecx \n" "shr $3, %%edx \n" "and $0x1E, %%edx \n" "mov (%[pal],%[d]), %%cx \n" "ror $1, %%cx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // * "mov %[tempx], %[c] \n" "dec %%ecx \n" "jnz 2b \n" // x_loop "mov %[tempy], %[c] \n" "dec %%ecx \n" "jz 4f \n" // end_y_loop "mov %[c], %[tempy] \n" "add %[line], %[src] \n" "add %[ext], %[dst] \n" "mov %[wid_64], %%ecx \n" "3: \n" // x_loop_2 "mov %[c], %[tempx] \n" "mov 4(%[src]), %%eax \n" // read all 8 pixels "bswap %%eax \n" "mov %%eax, %%edx \n" // 1st dword output { "shr $23, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $1, %%cx \n" "shl $16, %%ecx \n" "mov %%edx, %%eax \n" "shr $27, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $1, %%cx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // 2nd dword output { "mov %%edx, %%eax \n" "shr $15, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $1, %%cx \n" "shl $16, %%ecx \n" "mov %%edx, %%eax \n" "shr $19, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $1, %%cx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // 3rd dword output { "mov %%edx, %%eax \n" "shr $7, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $1, %%cx \n" "shl $16, %%ecx \n" "mov %%edx, %%eax \n" "shr $11, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $1, %%cx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // 4th dword output { "mov %%edx, %%eax \n" "shl $1, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $1, %%cx \n" "shl $16, %%ecx \n" "shr $3, %%edx \n" "and $0x1E, %%edx \n" "mov (%[pal],%[d]), %%cx \n" "ror $1, %%cx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // * copy "mov (%[src]), %%eax \n" // read all 8 pixels "bswap %%eax \n" "add $8, %[src] \n" "mov %%eax, %%edx \n" // 1st dword output { "shr $23, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $1, %%cx \n" "shl $16, %%ecx \n" "mov %%edx, %%eax \n" "shr $27, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $1, %%cx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // 2nd dword output { "mov %%edx, %%eax \n" "shr $15, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $1, %%cx \n" "shl $16, %%ecx \n" "mov %%edx, %%eax \n" "shr $19, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $1, %%cx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // 3rd dword output { "mov %%edx, %%eax \n" "shr $7, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $1, %%cx \n" "shl $16, %%ecx \n" "mov %%edx, %%eax \n" "shr $11, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $1, %%cx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // 4th dword output { "mov %%edx, %%eax \n" "shl $1, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $1, %%cx \n" "shl $16, %%ecx \n" "shr $3, %%edx \n" "and $0x1E, %%edx \n" "mov (%[pal],%[d]), %%cx \n" "ror $1, %%cx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // * "mov %[tempx], %[c] \n" "dec %%ecx \n" "jnz 3b \n" // x_loop_2 "add %[line], %[src] \n" "add %[ext], %[dst] \n" "mov %[tempy], %[c] \n" "dec %%ecx \n" "jnz 1b \n" // y_loop "4: \n" // end_y_loop : [tempx]"=m"(lTempX), [tempy]"=m"(lTempY), [a] "=&a"(fake_eax), [d] "=&d"(fake_edx), [src] "+S"(src), [dst] "+D"(dst), [c] "+c"(lHeight) // pal needs to be in a register because its used in mov (%[pal],...), ... : [pal] "r" (pal), [wid_64] "g" (wid_64), [line] "g" ((uintptr_t)line), [ext] "g" ((uintptr_t)ext) : "memory", "cc" ); #endif } else { #if !defined(__GNUC__) && !defined(NO_ASM) __asm { mov ebx,dword ptr [pal] mov esi,dword ptr [src] mov edi,dword ptr [dst] mov ecx,dword ptr [height] ia_y_loop: push ecx mov ecx,dword ptr [wid_64] ia_x_loop: push ecx mov eax,dword ptr [esi] // read all 8 pixels bswap eax add esi,4 mov edx,eax // 1st dword output { shr eax,23 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,8 shl ecx,16 mov eax,edx shr eax,27 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,8 mov dword ptr [edi],ecx add edi,4 // } // 2nd dword output { mov eax,edx shr eax,15 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,8 shl ecx,16 mov eax,edx shr eax,19 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,8 mov dword ptr [edi],ecx add edi,4 // } // 3rd dword output { mov eax,edx shr eax,7 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,8 shl ecx,16 mov eax,edx shr eax,11 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,8 mov dword ptr [edi],ecx add edi,4 // } // 4th dword output { mov eax,edx shl eax,1 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,8 shl ecx,16 shr edx,3 and edx,0x1E mov cx,word ptr [ebx+edx] ror cx,8 mov dword ptr [edi],ecx add edi,4 // } // * copy mov eax,dword ptr [esi] // read all 8 pixels bswap eax add esi,4 mov edx,eax // 1st dword output { shr eax,23 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,8 shl ecx,16 mov eax,edx shr eax,27 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,8 mov dword ptr [edi],ecx add edi,4 // } // 2nd dword output { mov eax,edx shr eax,15 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,8 shl ecx,16 mov eax,edx shr eax,19 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,8 mov dword ptr [edi],ecx add edi,4 // } // 3rd dword output { mov eax,edx shr eax,7 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,8 shl ecx,16 mov eax,edx shr eax,11 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,8 mov dword ptr [edi],ecx add edi,4 // } // 4th dword output { mov eax,edx shl eax,1 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,8 shl ecx,16 shr edx,3 and edx,0x1E mov cx,word ptr [ebx+edx] ror cx,8 mov dword ptr [edi],ecx add edi,4 // } // * pop ecx dec ecx jnz ia_x_loop pop ecx dec ecx jz ia_end_y_loop push ecx add esi,dword ptr [line] add edi,dword ptr [ext] mov ecx,dword ptr [wid_64] ia_x_loop_2: push ecx mov eax,dword ptr [esi+4] // read all 8 pixels bswap eax mov edx,eax // 1st dword output { shr eax,23 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,8 shl ecx,16 mov eax,edx shr eax,27 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,8 mov dword ptr [edi],ecx add edi,4 // } // 2nd dword output { mov eax,edx shr eax,15 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,8 shl ecx,16 mov eax,edx shr eax,19 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,8 mov dword ptr [edi],ecx add edi,4 // } // 3rd dword output { mov eax,edx shr eax,7 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,8 shl ecx,16 mov eax,edx shr eax,11 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,8 mov dword ptr [edi],ecx add edi,4 // } // 4th dword output { mov eax,edx shl eax,1 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,8 shl ecx,16 shr edx,3 and edx,0x1E mov cx,word ptr [ebx+edx] ror cx,8 mov dword ptr [edi],ecx add edi,4 // } // * copy mov eax,dword ptr [esi] // read all 8 pixels bswap eax add esi,8 mov edx,eax // 1st dword output { shr eax,23 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,8 shl ecx,16 mov eax,edx shr eax,27 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,8 mov dword ptr [edi],ecx add edi,4 // } // 2nd dword output { mov eax,edx shr eax,15 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,8 shl ecx,16 mov eax,edx shr eax,19 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,8 mov dword ptr [edi],ecx add edi,4 // } // 3rd dword output { mov eax,edx shr eax,7 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,8 shl ecx,16 mov eax,edx shr eax,11 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,8 mov dword ptr [edi],ecx add edi,4 // } // 4th dword output { mov eax,edx shl eax,1 and eax,0x1E mov cx,word ptr [ebx+eax] ror cx,8 shl ecx,16 shr edx,3 and edx,0x1E mov cx,word ptr [ebx+edx] ror cx,8 mov dword ptr [edi],ecx add edi,4 // } // * pop ecx dec ecx jnz ia_x_loop_2 add esi,dword ptr [line] add edi,dword ptr [ext] pop ecx dec ecx jnz ia_y_loop ia_end_y_loop: } #elif !defined(NO_ASM) //printf("Load4bCI2\n"); long lTempX, lTempY, lHeight = (long) height; intptr_t fake_eax, fake_edx; asm volatile ( "1: \n" // ia_y_loop "mov %[c], %[tempy] \n" "mov %[wid_64], %%ecx \n" "2: \n" // ia_x_loop "mov %[c], %[tempx] \n" "mov (%[src]), %%eax \n" // read all 8 pixels "bswap %%eax \n" "add $4, %[src] \n" "mov %%eax, %%edx \n" // 1st dword output { "shr $23, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $8, %%cx \n" "shl $16, %%ecx \n" "mov %%edx, %%eax \n" "shr $27, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $8, %%cx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // 2nd dword output { "mov %%edx, %%eax \n" "shr $15, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $8, %%cx \n" "shl $16, %%ecx \n" "mov %%edx, %%eax \n" "shr $19, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $8, %%cx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // 3rd dword output { "mov %%edx, %%eax \n" "shr $7, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $8, %%cx \n" "shl $16, %%ecx \n" "mov %%edx, %%eax \n" "shr $11, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $8, %%cx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // 4th dword output { "mov %%edx, %%eax \n" "shl $1, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $8, %%cx \n" "shl $16, %%ecx \n" "shr $3, %%edx \n" "and $0x1E, %%edx \n" "mov (%[pal],%[d]), %%cx \n" "ror $8, %%cx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // * copy "mov (%[src]), %%eax \n" // read all 8 pixels "bswap %%eax \n" "add $4, %[src] \n" "mov %%eax, %%edx \n" // 1st dword output { "shr $23, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $8, %%cx \n" "shl $16, %%ecx \n" "mov %%edx, %%eax \n" "shr $27, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $8, %%cx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // 2nd dword output { "mov %%edx, %%eax \n" "shr $15, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $8, %%cx \n" "shl $16, %%ecx \n" "mov %%edx, %%eax \n" "shr $19, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $8, %%cx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // 3rd dword output { "mov %%edx, %%eax \n" "shr $7, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $8,%%cx \n" "shl $16, %%ecx \n" "mov %%edx, %%eax \n" "shr $11, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $8, %%cx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // 4th dword output { "mov %%edx, %%eax \n" "shl $1, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $8, %%cx \n" "shl $16, %%ecx \n" "shr $3, %%edx \n" "and $0x1E, %%edx \n" "mov (%[pal],%[d]), %%cx \n" "ror $8, %%cx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // * "mov %[tempx], %[c] \n" "dec %%ecx \n" "jnz 2b \n" // ia_x_loop "mov %[tempy], %[c] \n" "dec %%ecx \n" "jz 4f \n" // ia_end_y_loop "mov %[c], %[tempy] \n" "add %[line], %[src] \n" "add %[ext], %[dst] \n" "mov %[wid_64], %%ecx \n" "3: \n" // ia_x_loop_2 "mov %[c], %[tempx] \n" "mov 4(%[src]), %%eax \n" // read all 8 pixels "bswap %%eax \n" "mov %%eax, %%edx \n" // 1st dword output { "shr $23, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $8, %%cx \n" "shl $16, %%ecx \n" "mov %%edx, %%eax \n" "shr $27, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $8, %%cx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // 2nd dword output { "mov %%edx, %%eax \n" "shr $15, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $8, %%cx \n" "shl $16, %%ecx \n" "mov %%edx, %%eax \n" "shr $19, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $8, %%cx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // 3rd dword output { "mov %%edx, %%eax \n" "shr $7, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $8, %%cx \n" "shl $16, %%ecx \n" "mov %%edx, %%eax \n" "shr $11, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $8, %%cx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // 4th dword output { "mov %%edx, %%eax \n" "shl $1, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $8, %%cx \n" "shl $16, %%ecx \n" "shr $3, %%edx \n" "and $0x1E, %%edx \n" "mov (%[pal],%[d]), %%cx \n" "ror $8, %%cx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // * copy "mov (%[src]), %%eax \n" // read all 8 pixels "bswap %%eax \n" "add $8, %[src] \n" "mov %%eax, %%edx \n" // 1st dword output { "shr $23, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $8, %%cx \n" "shl $16, %%ecx \n" "mov %%edx, %%eax \n" "shr $27, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $8, %%cx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // 2nd dword output { "mov %%edx, %%eax \n" "shr $15, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $8, %%cx \n" "shl $16, %%ecx \n" "mov %%edx, %%eax \n" "shr $19, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $8, %%cx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // 3rd dword output { "mov %%edx, %%eax \n" "shr $7, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $8, %%cx \n" "shl $16, %%ecx \n" "mov %%edx, %%eax \n" "shr $11, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $8, %%cx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // 4th dword output { "mov %%edx, %%eax \n" "shl $1, %%eax \n" "and $0x1E, %%eax \n" "mov (%[pal],%[a]), %%cx \n" "ror $8, %%cx \n" "shl $16, %%ecx \n" "shr $3, %%edx \n" "and $0x1E, %%edx \n" "mov (%[pal],%[d]), %%cx \n" "ror $8, %%cx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // * "mov %[tempx], %[c] \n" "dec %%ecx \n" "jnz 3b \n" // ia_x_loop_2 "add %[line], %[src] \n" "add %[ext], %[dst] \n" "mov %[tempy], %[c] \n" "dec %%ecx \n" "jnz 1b \n" // ia_y_loop "4: \n" // ia_end_y_loop : [tempx]"=m"(lTempX), [tempy]"=m"(lTempY), [a] "=&a"(fake_eax), [d] "=&d"(fake_edx), [src] "+S"(src), [dst] "+D"(dst), [c] "+c"(lHeight) // pal needs to be in a register because its used in mov (%[pal],...), ... : [pal] "r" (pal), [wid_64] "g" (wid_64), [line] "g" ((uintptr_t)line), [ext] "g" ((uintptr_t)ext) : "memory", "cc" ); #endif return (1 << 16) | GR_TEXFMT_ALPHA_INTENSITY_88; } return (1 << 16) | GR_TEXFMT_ARGB_1555; } //**************************************************************** // Size: 0, Format: 3 // // ** BY GUGAMAN ** DWORD Load4bIA (unsigned char * dst, unsigned char * src, int wid_64, int height, int line, int real_width, int tile) { if (rdp.tlut_mode != 0) return Load4bCI (dst, src, wid_64, height, line, real_width, tile); if (wid_64 < 1) wid_64 = 1; if (height < 1) height = 1; int ext = (real_width - (wid_64 << 4)); #if !defined(__GNUC__) && !defined(NO_ASM) __asm { mov esi,dword ptr [src] mov edi,dword ptr [dst] mov ecx,dword ptr [height] y_loop: push ecx mov ecx,dword ptr [wid_64] x_loop: push ecx mov eax,dword ptr [esi] // read all 8 pixels bswap eax add esi,4 mov edx,eax // 1st dword { xor ecx,ecx // pixel #1 // IIIAxxxxxxxxxxxxxxxxxxxxxxxxxxxx // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII mov eax,edx shr eax,24 //Alpha and eax,0x00000010 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shr eax,28 // Intensity and eax,0x0000000E or ecx,eax shr eax,3 or ecx,eax // pixel #2 // xxxxIIIAxxxxxxxxxxxxxxxxxxxxxxxx // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx mov eax,edx shr eax,12 //Alpha and eax,0x00001000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shr eax,16 // Intensity and eax,0x00000E00 or ecx,eax shr eax,3 and eax,0x00000100 or ecx,eax // pixel #3 // xxxxxxxxIIIAxxxxxxxxxxxxxxxxxxxx // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx //Alpha mov eax,edx and eax,0x00100000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shr eax,4 // Intensity and eax,0x000E0000 or ecx,eax shr eax,3 and eax,0x00010000 or ecx,eax // pixel #4 // xxxxxxxxxxxxIIIAxxxxxxxxxxxxxxxx // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx mov eax,edx shl eax,12 //Alpha and eax,0x10000000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shl eax,8 // Intensity and eax,0x0E000000 or ecx,eax shr eax,3 and eax,0x01000000 or ecx,eax mov dword ptr [edi],ecx add edi,4 // } // 2nd dword { xor ecx,ecx // pixel #5 // xxxxxxxxxxxxxxxxIIIAxxxxxxxxxxxx // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII mov eax,edx shr eax,8 //Alpha and eax,0x00000010 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shr eax,12 // Intensity and eax,0x0000000E or ecx,eax shr eax,3 or ecx,eax // pixel #6 // xxxxxxxxxxxxxxxxxxxxIIIAxxxxxxxx // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx //Alpha mov eax,edx shl eax,4 and eax,0x00001000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx // Intensity and eax,0x00000E00 or ecx,eax shr eax,3 and eax,0x00000100 or ecx,eax // pixel #7 // xxxxxxxxxxxxxxxxxxxxxxxxIIIAxxxx // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx //Alpha mov eax,edx shl eax,16 and eax,0x00100000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shl eax,12 // Intensity and eax,0x000E0000 or ecx,eax shr eax,3 and eax,0x00010000 or ecx,eax // pixel #8 // xxxxxxxxxxxxxxxxxxxxxxxxxxxxIIIA // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx mov eax,edx shl eax,28 //Alpha and eax,0x10000000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shl eax,24 // Intensity and eax,0x0E000000 or ecx,eax shr eax,3 and eax,0x01000000 or ecx,eax mov dword ptr [edi],ecx add edi,4 // } // * copy mov eax,dword ptr [esi] // read all 8 pixels bswap eax add esi,4 mov edx,eax // 1st dword { xor ecx,ecx // pixel #1 // IIIAxxxxxxxxxxxxxxxxxxxxxxxxxxxx // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII mov eax,edx shr eax,24 //Alpha and eax,0x00000010 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shr eax,28 // Intensity and eax,0x0000000E or ecx,eax shr eax,3 or ecx,eax // pixel #2 // xxxxIIIAxxxxxxxxxxxxxxxxxxxxxxxx // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx mov eax,edx shr eax,12 //Alpha and eax,0x00001000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shr eax,16 // Intensity and eax,0x00000E00 or ecx,eax shr eax,3 and eax,0x00000100 or ecx,eax // pixel #3 // xxxxxxxxIIIAxxxxxxxxxxxxxxxxxxxx // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx //Alpha mov eax,edx and eax,0x00100000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shr eax,4 // Intensity and eax,0x000E0000 or ecx,eax shr eax,3 and eax,0x00010000 or ecx,eax // pixel #4 // xxxxxxxxxxxxIIIAxxxxxxxxxxxxxxxx // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx mov eax,edx shl eax,12 //Alpha and eax,0x10000000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shl eax,8 // Intensity and eax,0x0E000000 or ecx,eax shr eax,3 and eax,0x01000000 or ecx,eax mov dword ptr [edi],ecx add edi,4 // } // 2nd dword { xor ecx,ecx // pixel #5 // xxxxxxxxxxxxxxxxIIIAxxxxxxxxxxxx // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII mov eax,edx shr eax,8 //Alpha and eax,0x00000010 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shr eax,12 // Intensity and eax,0x0000000E or ecx,eax shr eax,3 or ecx,eax // pixel #6 // xxxxxxxxxxxxxxxxxxxxIIIAxxxxxxxx // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx //Alpha mov eax,edx shl eax,4 and eax,0x00001000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx // Intensity and eax,0x00000E00 or ecx,eax shr eax,3 and eax,0x00000100 or ecx,eax // pixel #7 // xxxxxxxxxxxxxxxxxxxxxxxxIIIAxxxx // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx //Alpha mov eax,edx shl eax,16 and eax,0x00100000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shl eax,12 // Intensity and eax,0x000E0000 or ecx,eax shr eax,3 and eax,0x00010000 or ecx,eax // pixel #8 // xxxxxxxxxxxxxxxxxxxxxxxxxxxxIIIA // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx mov eax,edx shl eax,28 //Alpha and eax,0x10000000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shl eax,24 // Intensity and eax,0x0E000000 or ecx,eax shr eax,3 and eax,0x01000000 or ecx,eax mov dword ptr [edi],ecx add edi,4 // } // * pop ecx dec ecx jnz x_loop pop ecx dec ecx jz end_y_loop push ecx add esi,dword ptr [line] add edi,dword ptr [ext] mov ecx,dword ptr [wid_64] x_loop_2: push ecx mov eax,dword ptr [esi+4] // read all 8 pixels bswap eax mov edx,eax // 1st dword { xor ecx,ecx // pixel #1 // IIIAxxxxxxxxxxxxxxxxxxxxxxxxxxxx // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII mov eax,edx shr eax,24 //Alpha and eax,0x00000010 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shr eax,28 // Intensity and eax,0x0000000E or ecx,eax shr eax,3 or ecx,eax // pixel #2 // xxxxIIIAxxxxxxxxxxxxxxxxxxxxxxxx // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx mov eax,edx shr eax,12 //Alpha and eax,0x00001000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shr eax,16 // Intensity and eax,0x00000E00 or ecx,eax shr eax,3 and eax,0x00000100 or ecx,eax // pixel #3 // xxxxxxxxIIIAxxxxxxxxxxxxxxxxxxxx // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx //Alpha mov eax,edx and eax,0x00100000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shr eax,4 // Intensity and eax,0x000E0000 or ecx,eax shr eax,3 and eax,0x00010000 or ecx,eax // pixel #4 // xxxxxxxxxxxxIIIAxxxxxxxxxxxxxxxx // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx mov eax,edx shl eax,12 //Alpha and eax,0x10000000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shl eax,8 // Intensity and eax,0x0E000000 or ecx,eax shr eax,3 and eax,0x01000000 or ecx,eax mov dword ptr [edi],ecx add edi,4 // } // 2nd dword { xor ecx,ecx // pixel #5 // xxxxxxxxxxxxxxxxIIIAxxxxxxxxxxxx // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII mov eax,edx shr eax,8 //Alpha and eax,0x00000010 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shr eax,12 // Intensity and eax,0x0000000E or ecx,eax shr eax,3 or ecx,eax // pixel #6 // xxxxxxxxxxxxxxxxxxxxIIIAxxxxxxxx // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx //Alpha mov eax,edx shl eax,4 and eax,0x00001000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx // Intensity and eax,0x00000E00 or ecx,eax shr eax,3 and eax,0x00000100 or ecx,eax // pixel #7 // xxxxxxxxxxxxxxxxxxxxxxxxIIIAxxxx // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx //Alpha mov eax,edx shl eax,16 and eax,0x00100000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shl eax,12 // Intensity and eax,0x000E0000 or ecx,eax shr eax,3 and eax,0x00010000 or ecx,eax // pixel #8 // xxxxxxxxxxxxxxxxxxxxxxxxxxxxIIIA // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx mov eax,edx shl eax,28 //Alpha and eax,0x10000000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shl eax,24 // Intensity and eax,0x0E000000 or ecx,eax shr eax,3 and eax,0x01000000 or ecx,eax mov dword ptr [edi],ecx add edi,4 // } // * copy mov eax,dword ptr [esi] // read all 8 pixels bswap eax add esi,8 mov edx,eax // 1st dword { xor ecx,ecx // pixel #1 // IIIAxxxxxxxxxxxxxxxxxxxxxxxxxxxx // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII mov eax,edx shr eax,24 //Alpha and eax,0x00000010 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shr eax,28 // Intensity and eax,0x0000000E or ecx,eax shr eax,3 or ecx,eax // pixel #2 // xxxxIIIAxxxxxxxxxxxxxxxxxxxxxxxx // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx mov eax,edx shr eax,12 //Alpha and eax,0x00001000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shr eax,16 // Intensity and eax,0x00000E00 or ecx,eax shr eax,3 and eax,0x00000100 or ecx,eax // pixel #3 // xxxxxxxxIIIAxxxxxxxxxxxxxxxxxxxx // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx //Alpha mov eax,edx and eax,0x00100000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shr eax,4 // Intensity and eax,0x000E0000 or ecx,eax shr eax,3 and eax,0x00010000 or ecx,eax // pixel #4 // xxxxxxxxxxxxIIIAxxxxxxxxxxxxxxxx // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx mov eax,edx shl eax,12 //Alpha and eax,0x10000000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shl eax,8 // Intensity and eax,0x0E000000 or ecx,eax shr eax,3 and eax,0x01000000 or ecx,eax mov dword ptr [edi],ecx add edi,4 // } // 2nd dword { xor ecx,ecx // pixel #5 // xxxxxxxxxxxxxxxxIIIAxxxxxxxxxxxx // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII mov eax,edx shr eax,8 //Alpha and eax,0x00000010 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shr eax,12 // Intensity and eax,0x0000000E or ecx,eax shr eax,3 or ecx,eax // pixel #6 // xxxxxxxxxxxxxxxxxxxxIIIAxxxxxxxx // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx //Alpha mov eax,edx shl eax,4 and eax,0x00001000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx // Intensity and eax,0x00000E00 or ecx,eax shr eax,3 and eax,0x00000100 or ecx,eax // pixel #7 // xxxxxxxxxxxxxxxxxxxxxxxxIIIAxxxx // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx //Alpha mov eax,edx shl eax,16 and eax,0x00100000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shl eax,12 // Intensity and eax,0x000E0000 or ecx,eax shr eax,3 and eax,0x00010000 or ecx,eax // pixel #8 // xxxxxxxxxxxxxxxxxxxxxxxxxxxxIIIA // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx mov eax,edx shl eax,28 //Alpha and eax,0x10000000 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax shl eax,1 or ecx,eax mov eax,edx shl eax,24 // Intensity and eax,0x0E000000 or ecx,eax shr eax,3 and eax,0x01000000 or ecx,eax mov dword ptr [edi],ecx add edi,4 // } // * pop ecx dec ecx jnz x_loop_2 add esi,dword ptr [line] add edi,dword ptr [ext] pop ecx dec ecx jnz y_loop end_y_loop: } #elif !defined(NO_ASM) //printf("Load4bIA\n"); long lTempX, lTempY, lHeight = (long) height; asm volatile ( "1: \n" // y_loop2 "mov %[c], %[tempy] \n" "mov %[wid_64], %%ecx \n" "2: \n" // x_loop2 "mov %[c], %[tempx] \n" "mov (%[src]), %%eax \n" // read all 8 pixels "bswap %%eax \n" "add $4, %[src] \n" "mov %%eax, %%edx \n" // 1st dword { "xor %%ecx, %%ecx \n" // pixel #1 // IIIAxxxxxxxxxxxxxxxxxxxxxxxxxxxx // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII "mov %%edx, %%eax \n" "shr $24, %%eax \n" //Alpha "and $0x00000010, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" "shr $28, %%eax \n" // Intensity "and $0x0000000E, %%eax \n" "or %%eax, %%ecx \n" "shr $3, %%eax \n" "or %%eax, %%ecx \n" // pixel #2 // xxxxIIIAxxxxxxxxxxxxxxxxxxxxxxxx // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx "mov %%edx, %%eax \n" "shr $12, %%eax \n" //Alpha "and $0x00001000, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" "shr $16, %%eax \n" // Intensity "and $0x00000E00, %%eax \n" "or %%eax, %%ecx \n" "shr $3, %%eax \n" "and $0x00000100, %%eax \n" "or %%eax, %%ecx \n" // pixel #3 // xxxxxxxxIIIAxxxxxxxxxxxxxxxxxxxx // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx //Alpha "mov %%edx, %%eax \n" "and $0x00100000, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" "shr $4, %%eax \n" // Intensity "and $0x000E0000, %%eax \n" "or %%eax, %%ecx \n" "shr $3, %%eax \n" "and $0x00010000, %%eax \n" "or %%eax, %%ecx \n" // pixel #4 // xxxxxxxxxxxxIIIAxxxxxxxxxxxxxxxx // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx "mov %%edx, %%eax \n" "shl $12, %%eax \n" //Alpha "and $0x10000000, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" "shl $8, %%eax \n" // Intensity "and $0x0E000000, %%eax \n" "or %%eax, %%ecx \n" "shr $3, %%eax \n" "and $0x01000000, %%eax \n" "or %%eax, %%ecx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // 2nd dword { "xor %%ecx, %%ecx \n" // pixel #5 // xxxxxxxxxxxxxxxxIIIAxxxxxxxxxxxx // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII "mov %%edx, %%eax \n" "shr $8, %%eax \n" //Alpha "and $0x00000010, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" "shr $12, %%eax \n" // Intensity "and $0x0000000E, %%eax \n" "or %%eax, %%ecx \n" "shr $3, %%eax \n" "or %%eax, %%ecx \n" // pixel #6 // xxxxxxxxxxxxxxxxxxxxIIIAxxxxxxxx // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx //Alpha "mov %%edx, %%eax \n" "shl $4, %%eax \n" "and $0x00001000, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" // Intensity "and $0x00000E00, %%eax \n" "or %%eax, %%ecx \n" "shr $3, %%eax \n" "and $0x00000100, %%eax \n" "or %%eax, %%ecx \n" // pixel #7 // xxxxxxxxxxxxxxxxxxxxxxxxIIIAxxxx // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx //Alpha "mov %%edx, %%eax \n" "shl $16, %%eax \n" "and $0x00100000, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" "shl $12, %%eax \n" // Intensity "and $0x000E0000, %%eax \n" "or %%eax, %%ecx \n" "shr $3, %%eax \n" "and $0x00010000, %%eax \n" "or %%eax, %%ecx \n" // pixel #8 // xxxxxxxxxxxxxxxxxxxxxxxxxxxxIIIA // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx "mov %%edx, %%eax \n" "shl $28, %%eax \n" //Alpha "and $0x10000000, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" "shl $24, %%eax \n" // Intensity "and $0x0E000000, %%eax \n" "or %%eax, %%ecx \n" "shr $3, %%eax \n" "and $0x01000000, %%eax \n" "or %%eax, %%ecx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // * copy "mov (%[src]), %%eax \n" // read all 8 pixels "bswap %%eax \n" "add $4, %[src] \n" "mov %%eax, %%edx \n" // 1st dword { "xor %%ecx, %%ecx \n" // pixel #1 // IIIAxxxxxxxxxxxxxxxxxxxxxxxxxxxx // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII "mov %%edx, %%eax \n" "shr $24, %%eax \n" //Alpha "and $0x00000010, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" "shr $28, %%eax \n" // Intensity "and $0x0000000E, %%eax \n" "or %%eax, %%ecx \n" "shr $3, %%eax \n" "or %%eax, %%ecx \n" // pixel #2 // xxxxIIIAxxxxxxxxxxxxxxxxxxxxxxxx // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx "mov %%edx, %%eax \n" "shr $12, %%eax \n" //Alpha "and $0x00001000, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" "shr $16, %%eax \n" // Intensity "and $0x00000E00, %%eax \n" "or %%eax, %%ecx \n" "shr $3, %%eax \n" "and $0x00000100, %%eax \n" "or %%eax, %%ecx \n" // pixel #3 // xxxxxxxxIIIAxxxxxxxxxxxxxxxxxxxx // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx //Alpha "mov %%edx, %%eax \n" "and $0x00100000, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" "shr $4, %%eax \n" // Intensity "and $0x000E0000, %%eax \n" "or %%eax, %%ecx \n" "shr $3, %%eax \n" "and $0x00010000, %%eax \n" "or %%eax, %%ecx \n" // pixel #4 // xxxxxxxxxxxxIIIAxxxxxxxxxxxxxxxx // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx "mov %%edx, %%eax \n" "shl $12, %%eax \n" //Alpha "and $0x10000000, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" "shl $8, %%eax \n" // Intensity "and $0x0E000000, %%eax \n" "or %%eax, %%ecx \n" "shr $3, %%eax \n" "and $0x01000000, %%eax \n" "or %%eax, %%ecx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // 2nd dword { "xor %%ecx, %%ecx \n" // pixel #5 // xxxxxxxxxxxxxxxxIIIAxxxxxxxxxxxx // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII "mov %%edx, %%eax \n" "shr $8, %%eax \n" //Alpha "and $0x00000010, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" "shr $12, %%eax \n" // Intensity "and $0x0000000E, %%eax \n" "or %%eax, %%ecx \n" "shr $3, %%eax \n" "or %%eax, %%ecx \n" // pixel #6 // xxxxxxxxxxxxxxxxxxxxIIIAxxxxxxxx // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx //Alpha "mov %%edx, %%eax \n" "shl $4, %%eax \n" "and $0x00001000, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" // Intensity "and $0x00000E00, %%eax \n" "or %%eax, %%ecx \n" "shr $3, %%eax \n" "and $0x00000100, %%eax \n" "or %%eax, %%ecx \n" // pixel #7 // xxxxxxxxxxxxxxxxxxxxxxxxIIIAxxxx // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx //Alpha "mov %%edx, %%eax \n" "shl $16, %%eax \n" "and $0x00100000, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" "shl $12, %%eax \n" // Intensity "and $0x000E0000, %%eax \n" "or %%eax, %%ecx \n" "shr $3, %%eax \n" "and $0x00010000, %%eax \n" "or %%eax, %%ecx \n" // pixel #8 // xxxxxxxxxxxxxxxxxxxxxxxxxxxxIIIA // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx "mov %%edx, %%eax \n" "shl $28, %%eax \n" //Alpha "and $0x10000000, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" "shl $24, %%eax \n" // Intensity "and $0x0E000000, %%eax \n" "or %%eax, %%ecx \n" "shr $3, %%eax \n" "and $0x01000000, %%eax \n" "or %%eax, %%ecx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // * "mov %[tempx], %[c] \n" "dec %%ecx \n" "jnz 2b \n" // x_loop2 "mov %[tempy], %[c] \n" "dec %%ecx \n" "jz 4f \n" // end_y_loop2 "mov %[c], %[tempy] \n" "add %[line], %[src] \n" "add %[ext], %[dst] \n" "mov %[wid_64], %%ecx \n" "3: \n" // x_loop_22 "mov %[c], %[tempx] \n" "mov 4(%[src]), %%eax \n" // read all 8 pixels "bswap %%eax \n" "mov %%eax, %%edx \n" // 1st dword { "xor %%ecx, %%ecx \n" // pixel #1 // IIIAxxxxxxxxxxxxxxxxxxxxxxxxxxxx // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII "mov %%edx, %%eax \n" "shr $24, %%eax \n" //Alpha "and $0x00000010, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" "shr $28, %%eax \n" // Intensity "and $0x0000000E, %%eax \n" "or %%eax, %%ecx \n" "shr $3, %%eax \n" "or %%eax, %%ecx \n" // pixel #2 // xxxxIIIAxxxxxxxxxxxxxxxxxxxxxxxx // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx "mov %%edx, %%eax \n" "shr $12, %%eax \n" //Alpha "and $0x00001000, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" "shr $16, %%eax \n" // Intensity "and $0x00000E00, %%eax \n" "or %%eax, %%ecx \n" "shr $3, %%eax \n" "and $0x00000100, %%eax \n" "or %%eax, %%ecx \n" // pixel #3 // xxxxxxxxIIIAxxxxxxxxxxxxxxxxxxxx // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx //Alpha "mov %%edx, %%eax \n" "and $0x00100000, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" "shr $4, %%eax \n" // Intensity "and $0x000E0000, %%eax \n" "or %%eax, %%ecx \n" "shr $3, %%eax \n" "and $0x00010000, %%eax \n" "or %%eax, %%ecx \n" // pixel #4 // xxxxxxxxxxxxIIIAxxxxxxxxxxxxxxxx // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx "mov %%edx, %%eax \n" "shl $12, %%eax \n" //Alpha "and $0x10000000, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" "shl $8, %%eax \n" // Intensity "and $0x0E000000, %%eax \n" "or %%eax, %%ecx \n" "shr $3, %%eax \n" "and $0x01000000, %%eax \n" "or %%eax, %%ecx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // 2nd dword { "xor %%ecx, %%ecx \n" // pixel #5 // xxxxxxxxxxxxxxxxIIIAxxxxxxxxxxxx // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII "mov %%edx, %%eax \n" "shr $8, %%eax \n" //Alpha "and $0x00000010, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" "shr $12, %%eax \n" // Intensity "and $0x0000000E, %%eax \n" "or %%eax, %%ecx \n" "shr $3, %%eax \n" "or %%eax, %%ecx \n" // pixel #6 // xxxxxxxxxxxxxxxxxxxxIIIAxxxxxxxx // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx //Alpha "mov %%edx, %%eax \n" "shl $4, %%eax \n" "and $0x00001000, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" // Intensity "and $0x00000E00, %%eax \n" "or %%eax, %%ecx \n" "shr $3, %%eax \n" "and $0x00000100, %%eax \n" "or %%eax, %%ecx \n" // pixel #7 // xxxxxxxxxxxxxxxxxxxxxxxxIIIAxxxx // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx //Alpha "mov %%edx, %%eax \n" "shl $16, %%eax \n" "and $0x00100000, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" "shl $12, %%eax \n" // Intensity "and $0x000E0000, %%eax \n" "or %%eax, %%ecx \n" "shr $3, %%eax \n" "and $0x00010000, %%eax \n" "or %%eax, %%ecx \n" // pixel #8 // xxxxxxxxxxxxxxxxxxxxxxxxxxxxIIIA // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx "mov %%edx, %%eax \n" "shl $28, %%eax \n" //Alpha "and $0x10000000, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" "shl $24, %%eax \n" // Intensity "and $0x0E000000, %%eax \n" "or %%eax, %%ecx \n" "shr $3, %%eax \n" "and $0x01000000, %%eax \n" "or %%eax, %%ecx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // * copy "mov (%[src]), %%eax \n" // read all 8 pixels "bswap %%eax \n" "add $8, %[src] \n" "mov %%eax, %%edx \n" // 1st dword { "xor %%ecx, %%ecx \n" // pixel #1 // IIIAxxxxxxxxxxxxxxxxxxxxxxxxxxxx // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII "mov %%edx, %%eax \n" "shr $24, %%eax \n" //Alpha "and $0x00000010, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" "shr $28, %%eax \n" // Intensity "and $0x0000000E, %%eax \n" "or %%eax, %%ecx \n" "shr $3, %%eax \n" "or %%eax, %%ecx \n" // pixel #2 // xxxxIIIAxxxxxxxxxxxxxxxxxxxxxxxx // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx "mov %%edx, %%eax \n" "shr $12, %%eax \n" //Alpha "and $0x00001000, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" "shr $16, %%eax \n" // Intensity "and $0x00000E00, %%eax \n" "or %%eax, %%ecx \n" "shr $3, %%eax \n" "and $0x00000100, %%eax \n" "or %%eax, %%ecx \n" // pixel #3 // xxxxxxxxIIIAxxxxxxxxxxxxxxxxxxxx // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx //Alpha "mov %%edx, %%eax \n" "and $0x00100000, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" "shr $4, %%eax \n" // Intensity "and $0x000E0000, %%eax \n" "or %%eax, %%ecx \n" "shr $3, %%eax \n" "and $0x00010000, %%eax \n" "or %%eax, %%ecx \n" // pixel #4 // xxxxxxxxxxxxIIIAxxxxxxxxxxxxxxxx // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx "mov %%edx, %%eax \n" "shl $12, %%eax \n" //Alpha "and $0x10000000, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" "shl $8, %%eax \n" // Intensity "and $0x0E000000, %%eax \n" "or %%eax, %%ecx \n" "shr $3, %%eax \n" "and $0x01000000, %%eax \n" "or %%eax, %%ecx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // 2nd dword { "xor %%ecx, %%ecx \n" // pixel #5 // xxxxxxxxxxxxxxxxIIIAxxxxxxxxxxxx // xxxxxxxxxxxxxxxxxxxxxxxxAAAAIIII "mov %%edx, %%eax \n" "shr $8, %%eax \n" //Alpha "and $0x00000010, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" "shr $12, %%eax \n" // Intensity "and $0x0000000E, %%eax \n" "or %%eax, %%ecx \n" "shr $3, %%eax \n" "or %%eax, %%ecx \n" // pixel #6 // xxxxxxxxxxxxxxxxxxxxIIIAxxxxxxxx // xxxxxxxxxxxxxxxxAAAAIIIIxxxxxxxx //Alpha "mov %%edx, %%eax \n" "shl $4, %%eax \n" "and $0x00001000, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" // Intensity "and $0x00000E00, %%eax \n" "or %%eax, %%ecx \n" "shr $3, %%eax \n" "and $0x00000100, %%eax \n" "or %%eax, %%ecx \n" // pixel #7 // xxxxxxxxxxxxxxxxxxxxxxxxIIIAxxxx // xxxxxxxxAAAAIIIIxxxxxxxxxxxxxxxx //Alpha "mov %%edx, %%eax \n" "shl $16, %%eax \n" "and $0x00100000, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" "shl $12, %%eax \n" // Intensity "and $0x000E0000, %%eax \n" "or %%eax, %%ecx \n" "shr $3, %%eax \n" "and $0x00010000, %%eax \n" "or %%eax, %%ecx \n" // pixel #8 // xxxxxxxxxxxxxxxxxxxxxxxxxxxxIIIA // AAAAIIIIxxxxxxxxxxxxxxxxxxxxxxxx "mov %%edx, %%eax \n" "shl $28, %%eax \n" //Alpha "and $0x10000000, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "shl $1, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" "shl $24, %%eax \n" // Intensity "and $0x0E000000, %%eax \n" "or %%eax, %%ecx \n" "shr $3, %%eax \n" "and $0x01000000, %%eax \n" "or %%eax, %%ecx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // * "mov %[tempx], %[c] \n" "dec %%ecx \n" "jnz 3b \n" // x_loop_22 "add %[line], %[src] \n" "add %[ext], %[dst] \n" "mov %[tempy], %[c] \n" "dec %%ecx \n" "jnz 1b \n" // y_loop2 "4: \n" // end_y_loop2 : [tempx]"=m"(lTempX), [tempy]"=m"(lTempY), [src]"+S"(src), [dst]"+D"(dst), [c]"+c"(lHeight) : [wid_64] "g" (wid_64), [line] "g" ((uintptr_t)line), [ext] "g" ((uintptr_t)ext) : "memory", "cc", "eax", "edx" ); #endif return /*(0 << 16) | */GR_TEXFMT_ALPHA_INTENSITY_44; } //**************************************************************** // Size: 0, Format: 4 DWORD Load4bI (unsigned char * dst, unsigned char * src, int wid_64, int height, int line, int real_width, int tile) { if (rdp.tlut_mode != 0) return Load4bCI (dst, src, wid_64, height, line, real_width, tile); if (wid_64 < 1) wid_64 = 1; if (height < 1) height = 1; int ext = (real_width - (wid_64 << 4)); #if !defined(__GNUC__) && !defined(NO_ASM) __asm { mov esi,dword ptr [src] mov edi,dword ptr [dst] mov ecx,dword ptr [height] y_loop: push ecx mov ecx,dword ptr [wid_64] x_loop: push ecx mov eax,dword ptr [esi] // read all 8 pixels bswap eax add esi,4 mov edx,eax // 1st dword { xor ecx,ecx shr eax,28 // 0xF0000000 -> 0x0000000F or ecx,eax shl eax,4 or ecx,eax mov eax,edx // 0x0F000000 -> 0x00000F00 shr eax,16 and eax,0x00000F00 or ecx,eax shl eax,4 or ecx,eax mov eax,edx shr eax,4 // 0x00F00000 -> 0x000F0000 and eax,0x000F0000 or ecx,eax shl eax,4 or ecx,eax mov eax,edx shl eax,8 // 0x000F0000 -> 0x0F000000 and eax,0x0F000000 or ecx,eax shl eax,4 or ecx,eax mov dword ptr [edi],ecx add edi,4 // } // 2nd dword { xor ecx,ecx mov eax,edx shr eax,12 // 0x0000F000 -> 0x0000000F and eax,0x0000000F or ecx,eax shl eax,4 or ecx,eax mov eax,edx // 0x00000F00 -> 0x00000F00 and eax,0x00000F00 or ecx,eax shl eax,4 or ecx,eax mov eax,edx shl eax,12 // 0x000000F0 -> 0x000F0000 and eax,0x000F0000 or ecx,eax shl eax,4 or ecx,eax shl edx,24 // 0x0000000F -> 0x0F000000 and edx,0x0F000000 or ecx,edx shl edx,4 or ecx,edx mov dword ptr [edi],ecx add edi,4 // } // * copy mov eax,dword ptr [esi] // read all 8 pixels bswap eax add esi,4 mov edx,eax // 1st dword { xor ecx,ecx shr eax,28 // 0xF0000000 -> 0x0000000F or ecx,eax shl eax,4 or ecx,eax mov eax,edx // 0x0F000000 -> 0x00000F00 shr eax,16 and eax,0x00000F00 or ecx,eax shl eax,4 or ecx,eax mov eax,edx shr eax,4 // 0x00F00000 -> 0x000F0000 and eax,0x000F0000 or ecx,eax shl eax,4 or ecx,eax mov eax,edx shl eax,8 // 0x000F0000 -> 0x0F000000 and eax,0x0F000000 or ecx,eax shl eax,4 or ecx,eax mov dword ptr [edi],ecx add edi,4 // } // 2nd dword { xor ecx,ecx mov eax,edx shr eax,12 // 0x0000F000 -> 0x0000000F and eax,0x0000000F or ecx,eax shl eax,4 or ecx,eax mov eax,edx // 0x00000F00 -> 0x00000F00 and eax,0x00000F00 or ecx,eax shl eax,4 or ecx,eax mov eax,edx shl eax,12 // 0x000000F0 -> 0x000F0000 and eax,0x000F0000 or ecx,eax shl eax,4 or ecx,eax shl edx,24 // 0x0000000F -> 0x0F000000 and edx,0x0F000000 or ecx,edx shl edx,4 or ecx,edx mov dword ptr [edi],ecx add edi,4 // } // * pop ecx dec ecx jnz x_loop pop ecx dec ecx jz end_y_loop push ecx add esi,dword ptr [line] add edi,dword ptr [ext] mov ecx,dword ptr [wid_64] x_loop_2: push ecx mov eax,dword ptr [esi+4] // read all 8 pixels bswap eax mov edx,eax // 1st dword { xor ecx,ecx shr eax,28 // 0xF0000000 -> 0x0000000F or ecx,eax shl eax,4 or ecx,eax mov eax,edx // 0x0F000000 -> 0x00000F00 shr eax,16 and eax,0x00000F00 or ecx,eax shl eax,4 or ecx,eax mov eax,edx shr eax,4 // 0x00F00000 -> 0x000F0000 and eax,0x000F0000 or ecx,eax shl eax,4 or ecx,eax mov eax,edx shl eax,8 // 0x000F0000 -> 0x0F000000 and eax,0x0F000000 or ecx,eax shl eax,4 or ecx,eax mov dword ptr [edi],ecx add edi,4 // } // 2nd dword { xor ecx,ecx mov eax,edx shr eax,12 // 0x0000F000 -> 0x0000000F and eax,0x0000000F or ecx,eax shl eax,4 or ecx,eax mov eax,edx // 0x00000F00 -> 0x00000F00 and eax,0x00000F00 or ecx,eax shl eax,4 or ecx,eax mov eax,edx shl eax,12 // 0x000000F0 -> 0x000F0000 and eax,0x000F0000 or ecx,eax shl eax,4 or ecx,eax shl edx,24 // 0x0000000F -> 0x0F000000 and edx,0x0F000000 or ecx,edx shl edx,4 or ecx,edx mov dword ptr [edi],ecx add edi,4 // } // * copy mov eax,dword ptr [esi] // read all 8 pixels bswap eax add esi,8 mov edx,eax // 1st dword { xor ecx,ecx shr eax,28 // 0xF0000000 -> 0x0000000F or ecx,eax shl eax,4 or ecx,eax mov eax,edx // 0x0F000000 -> 0x00000F00 shr eax,16 and eax,0x00000F00 or ecx,eax shl eax,4 or ecx,eax mov eax,edx shr eax,4 // 0x00F00000 -> 0x000F0000 and eax,0x000F0000 or ecx,eax shl eax,4 or ecx,eax mov eax,edx shl eax,8 // 0x000F0000 -> 0x0F000000 and eax,0x0F000000 or ecx,eax shl eax,4 or ecx,eax mov dword ptr [edi],ecx add edi,4 // } // 2nd dword { xor ecx,ecx mov eax,edx shr eax,12 // 0x0000F000 -> 0x0000000F and eax,0x0000000F or ecx,eax shl eax,4 or ecx,eax mov eax,edx // 0x00000F00 -> 0x00000F00 and eax,0x00000F00 or ecx,eax shl eax,4 or ecx,eax mov eax,edx shl eax,12 // 0x000000F0 -> 0x000F0000 and eax,0x000F0000 or ecx,eax shl eax,4 or ecx,eax shl edx,24 // 0x0000000F -> 0x0F000000 and edx,0x0F000000 or ecx,edx shl edx,4 or ecx,edx mov dword ptr [edi],ecx add edi,4 // } // * pop ecx dec ecx jnz x_loop_2 add esi,dword ptr [line] add edi,dword ptr [ext] pop ecx dec ecx jnz y_loop end_y_loop: } #elif !defined(NO_ASM) //printf("Load4bI\n"); int lTempX, lTempY, lHeight = (int) height; asm volatile ( "1: \n" // y_loop3 "mov %[c], %[tempy] \n" "mov %[wid_64], %%ecx \n" "2: \n" // x_loop3 "mov %[c], %[tempx] \n" "mov (%[src]), %%eax \n" // read all 8 pixels "bswap %%eax \n" "add $4, %[src] \n" "mov %%eax, %%edx \n" // 1st dword { "xor %%ecx, %%ecx \n" "shr $28, %%eax \n" // 0xF0000000 -> 0x0000000F "or %%eax, %%ecx \n" "shl $4, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" // 0x0F000000 -> 0x00000F00 "shr $16, %%eax \n" "and $0x00000F00, %%eax \n" "or %%eax, %%ecx \n" "shl $4, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" "shr $4, %%eax \n" // 0x00F00000 -> 0x000F0000 "and $0x000F0000, %%eax \n" "or %%eax, %%ecx \n" "shl $4, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" "shl $8, %%eax \n" // 0x000F0000 -> 0x0F000000 "and $0x0F000000, %%eax \n" "or %%eax, %%ecx \n" "shl $4, %%eax \n" "or %%eax, %%ecx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // 2nd dword { "xor %%ecx, %%ecx \n" "mov %%edx, %%eax \n" "shr $12, %%eax \n" // 0x0000F000 -> 0x0000000F "and $0x0000000F, %%eax \n" "or %%eax, %%ecx \n" "shl $4, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" // 0x00000F00 -> 0x00000F00 "and $0x00000F00, %%eax \n" "or %%eax, %%ecx \n" "shl $4, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" "shl $12, %%eax \n" // 0x000000F0 -> 0x000F0000 "and $0x000F0000, %%eax \n" "or %%eax, %%ecx \n" "shl $4, %%eax \n" "or %%eax, %%ecx \n" "shl $24, %%edx \n" // 0x0000000F -> 0x0F000000 "and $0x0F000000, %%edx \n" "or %%edx, %%ecx \n" "shl $4, %%edx \n" "or %%edx, %%ecx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // * copy "mov (%[src]), %%eax \n" // read all 8 pixels "bswap %%eax \n" "add $4, %[src] \n" "mov %%eax, %%edx \n" // 1st dword { "xor %%ecx, %%ecx \n" "shr $28, %%eax \n" // 0xF0000000 -> 0x0000000F "or %%eax, %%ecx \n" "shl $4, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" // 0x0F000000 -> 0x00000F00 "shr $16, %%eax \n" "and $0x00000F00, %%eax \n" "or %%eax, %%ecx \n" "shl $4, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" "shr $4, %%eax \n" // 0x00F00000 -> 0x000F0000 "and $0x000F0000, %%eax \n" "or %%eax, %%ecx \n" "shl $4, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" "shl $8, %%eax \n" // 0x000F0000 -> 0x0F000000 "and $0x0F000000, %%eax \n" "or %%eax, %%ecx \n" "shl $4, %%eax \n" "or %%eax, %%ecx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // 2nd dword { "xor %%ecx, %%ecx \n" "mov %%edx, %%eax \n" "shr $12, %%eax \n" // 0x0000F000 -> 0x0000000F "and $0x0000000F, %%eax \n" "or %%eax, %%ecx \n" "shl $4, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" // 0x00000F00 -> 0x00000F00 "and $0x00000F00, %%eax \n" "or %%eax, %%ecx \n" "shl $4, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" "shl $12, %%eax \n" // 0x000000F0 -> 0x000F0000 "and $0x000F0000, %%eax \n" "or %%eax, %%ecx \n" "shl $4, %%eax \n" "or %%eax, %%ecx \n" "shl $24, %%edx \n" // 0x0000000F -> 0x0F000000 "and $0x0F000000, %%edx \n" "or %%edx, %%ecx \n" "shl $4, %%edx \n" "or %%edx, %%ecx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // * "mov %[tempx], %[c] \n" "dec %%ecx \n" "jnz 2b \n" // x_loop3 "mov %[tempy], %[c] \n" "dec %%ecx \n" "jz 4f \n" // end_y_loop3 "mov %[c], %[tempy] \n" "add %[line], %[src] \n" "add %[ext], %[dst] \n" "mov %[wid_64], %%ecx \n" "3: \n" // x_loop_23 "mov %[c], %[tempx] \n" "mov 4(%[src]), %%eax \n" // read all 8 pixels "bswap %%eax \n" "mov %%eax, %%edx \n" // 1st dword { "xor %%ecx, %%ecx \n" "shr $28, %%eax \n" // 0xF0000000 -> 0x0000000F "or %%eax, %%ecx \n" "shl $4, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" // 0x0F000000 -> 0x00000F00 "shr $16, %%eax \n" "and $0x00000F00, %%eax \n" "or %%eax, %%ecx \n" "shl $4, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" "shr $4, %%eax \n" // 0x00F00000 -> 0x000F0000 "and $0x000F0000, %%eax \n" "or %%eax, %%ecx \n" "shl $4, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" "shl $8, %%eax \n" // 0x000F0000 -> 0x0F000000 "and $0x0F000000, %%eax \n" "or %%eax, %%ecx \n" "shl $4, %%eax \n" "or %%eax, %%ecx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // 2nd dword { "xor %%ecx, %%ecx \n" "mov %%edx, %%eax \n" "shr $12, %%eax \n" // 0x0000F000 -> 0x0000000F "and $0x0000000F, %%eax \n" "or %%eax, %%ecx \n" "shl $4, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" // 0x00000F00 -> 0x00000F00 "and $0x00000F00, %%eax \n" "or %%eax, %%ecx \n" "shl $4, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" "shl $12, %%eax \n" // 0x000000F0 -> 0x000F0000 "and $0x000F0000, %%eax \n" "or %%eax, %%ecx \n" "shl $4, %%eax \n" "or %%eax, %%ecx \n" "shl $24, %%edx \n" // 0x0000000F -> 0x0F000000 "and $0x0F000000, %%edx \n" "or %%edx, %%ecx \n" "shl $4, %%edx \n" "or %%edx, %%ecx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // * copy "mov (%[src]), %%eax \n" // read all 8 pixels "bswap %%eax \n" "add $8, %[src] \n" "mov %%eax, %%edx \n" // 1st dword { "xor %%ecx, %%ecx \n" "shr $28, %%eax \n" // 0xF0000000 -> 0x0000000F "or %%eax, %%ecx \n" "shl $4, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" // 0x0F000000 -> 0x00000F00 "shr $16, %%eax \n" "and $0x00000F00, %%eax \n" "or %%eax, %%ecx \n" "shl $4, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" "shr $4, %%eax \n" // 0x00F00000 -> 0x000F0000 "and $0x000F0000, %%eax \n" "or %%eax, %%ecx \n" "shl $4, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" "shl $8, %%eax \n" // 0x000F0000 -> 0x0F000000 "and $0x0F000000, %%eax \n" "or %%eax, %%ecx \n" "shl $4, %%eax \n" "or %%eax, %%ecx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // 2nd dword { "xor %%ecx, %%ecx \n" "mov %%edx, %%eax \n" "shr $12, %%eax \n" // 0x0000F000 -> 0x0000000F "and $0x0000000F, %%eax \n" "or %%eax, %%ecx \n" "shl $4, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" // 0x00000F00 -> 0x00000F00 "and $0x00000F00, %%eax \n" "or %%eax, %%ecx \n" "shl $4, %%eax \n" "or %%eax, %%ecx \n" "mov %%edx, %%eax \n" "shl $12, %%eax \n" // 0x000000F0 -> 0x000F0000 "and $0x000F0000, %%eax \n" "or %%eax, %%ecx \n" "shl $4, %%eax \n" "or %%eax, %%ecx \n" "shl $24, %%edx \n" // 0x0000000F -> 0x0F000000 "and $0x0F000000, %%edx \n" "or %%edx, %%ecx \n" "shl $4, %%edx \n" "or %%edx, %%ecx \n" "mov %%ecx, (%[dst]) \n" "add $4, %[dst] \n" // } // * "mov %[tempx], %[c] \n" "dec %%ecx \n" "jnz 3b \n" // x_loop_23 "add %[line], %[src] \n" "add %[ext], %[dst] \n" "mov %[tempy], %[c] \n" "dec %%ecx \n" "jnz 1b \n" // y_loop3 "4: \n" // end_y_loop3 : [tempx]"=m"(lTempX), [tempy]"=m"(lTempY), [src] "+S"(src), [dst] "+D"(dst), [c]"+c"(lHeight) : [wid_64] "g" (wid_64), [line] "g" ((uintptr_t)line), [ext] "g" ((uintptr_t)ext) : "memory", "cc", "eax", "edx" ); #endif return /*(0 << 16) | */GR_TEXFMT_ALPHA_INTENSITY_44; } //**************************************************************** // Size: 0, Format: 0 DWORD Load4bSelect (unsigned char * dst, unsigned char * src, int wid_64, int height, int line, int real_width, int tile) { if (rdp.tlut_mode == 0) return Load4bI (dst, src, wid_64, height, line, real_width, tile); return Load4bCI (dst, src, wid_64, height, line, real_width, tile); }