Glide64: Redo the asm for textures

This commit is contained in:
zilmar 2013-04-17 20:32:38 +10:00
parent e928a5995d
commit 4cbd5c538d
12 changed files with 3879 additions and 3834 deletions

View File

@ -25,140 +25,53 @@
#include <stdlib.h> #include <stdlib.h>
#include "Ext_TxFilter.h" #include "Ext_TxFilter.h"
typedef boolean (*txfilter_init)(int maxwidth, int maxheight, int maxbpp, extern "C" boolean txfilter_init(int maxwidth, int maxheight, int maxbpp, int options, int cachesize, wchar_t *path, wchar_t *ident, dispInfoFuncExt callback);
int options, int cachesize, extern "C" void txfilter_shutdown(void);
wchar_t *path, wchar_t *ident, extern "C" boolean txfilter(unsigned char *src, int srcwidth, int srcheight, unsigned short srcformat, uint64 g64crc, GHQTexInfo *info);
dispInfoFuncExt callback); extern "C" boolean txfilter_hirestex(uint64 g64crc, uint64 r_crc64, unsigned short *palette, GHQTexInfo *info);
extern "C" uint64 txfilter_checksum(unsigned char *src, int width, int height, int size, int rowStride, unsigned char *palette);
typedef void (*txfilter_shutdown)(void); extern "C" boolean txfilter_dmptx(unsigned char *src, int width, int height, int rowStridePixel, unsigned short gfmt, unsigned short n64fmt, uint64 r_crc64);
extern "C" boolean txfilter_reloadhirestex();
typedef boolean (*txfilter_filter)(unsigned char *src, int srcwidth, int srcheight, unsigned short srcformat,
uint64 g64crc, GHQTexInfo *info);
typedef boolean (*txfilter_hirestex)(uint64 g64crc, uint64 r_crc64, unsigned short *palette, GHQTexInfo *info);
typedef uint64 (*txfilter_checksum)(unsigned char *src, int width, int height, int size, int rowStride, unsigned char *palette);
typedef boolean (*txfilter_dmptx)(unsigned char *src, int width, int height, int rowStridePixel, unsigned short gfmt, unsigned short n64fmt, uint64 r_crc64);
typedef boolean (*txfilter_reloadhirestex)();
static struct {
TXHMODULE lib;
txfilter_init init;
txfilter_shutdown shutdown;
txfilter_filter filter;
txfilter_hirestex hirestex;
txfilter_checksum checksum;
txfilter_dmptx dmptx;
txfilter_reloadhirestex reloadhirestex;
} txfilter;
void ext_ghq_shutdown(void) void ext_ghq_shutdown(void)
{ {
if (txfilter.shutdown) txfilter_shutdown();
(*txfilter.shutdown)();
if (txfilter.lib) {
DLCLOSE(txfilter.lib);
memset(&txfilter, 0, sizeof(txfilter));
}
} }
boolean ext_ghq_init(int maxwidth, int maxheight, int maxbpp, int options, int cachesize, boolean ext_ghq_init(int maxwidth, int maxheight, int maxbpp, int options, int cachesize,
wchar_t *path, wchar_t *ident, wchar_t *path, wchar_t *ident,
dispInfoFuncExt callback) dispInfoFuncExt callback)
{ {
boolean bRet = 0; return txfilter_init(maxwidth, maxheight, maxbpp, options, cachesize, path, ident, callback);
if (!txfilter.lib) {
wchar_t curpath[MAX_PATH];
wcscpy(curpath, path);
#ifdef WIN32
wcscat(curpath, L"\\GlideHQ.dll");
txfilter.lib = DLOPEN(curpath);
#else
char cbuf[MAX_PATH];
wcscat(curpath, L"/GlideHQ.so");
wcstombs(cbuf, curpath, MAX_PATH);
txfilter.lib = DLOPEN(cbuf);
#endif
}
if (txfilter.lib) {
if (!txfilter.init)
txfilter.init = (txfilter_init)DLSYM(txfilter.lib, "txfilter_init");
if (!txfilter.shutdown)
txfilter.shutdown = (txfilter_shutdown)DLSYM(txfilter.lib, "txfilter_shutdown");
if (!txfilter.filter)
txfilter.filter = (txfilter_filter)DLSYM(txfilter.lib, "txfilter");
if (!txfilter.hirestex)
txfilter.hirestex = (txfilter_hirestex)DLSYM(txfilter.lib, "txfilter_hirestex");
if (!txfilter.checksum)
txfilter.checksum = (txfilter_checksum)DLSYM(txfilter.lib, "txfilter_checksum");
if (!txfilter.dmptx)
txfilter.dmptx = (txfilter_dmptx)DLSYM(txfilter.lib, "txfilter_dmptx");
if (!txfilter.reloadhirestex)
txfilter.reloadhirestex = (txfilter_reloadhirestex)DLSYM(txfilter.lib, "txfilter_reloadhirestex");
}
if (txfilter.init && txfilter.shutdown && txfilter.filter &&
txfilter.hirestex && txfilter.checksum /*&& txfilter.dmptx && txfilter.reloadhirestex */)
bRet = (*txfilter.init)(maxwidth, maxheight, maxbpp, options, cachesize, path, ident, callback);
else
ext_ghq_shutdown();
return bRet;
} }
boolean ext_ghq_txfilter(unsigned char *src, int srcwidth, int srcheight, unsigned short srcformat, boolean ext_ghq_txfilter(unsigned char *src, int srcwidth, int srcheight, unsigned short srcformat,
uint64 g64crc, GHQTexInfo *info) uint64 g64crc, GHQTexInfo *info)
{ {
boolean ret = 0; return txfilter(src, srcwidth, srcheight, srcformat, g64crc, info);;
if (txfilter.filter)
ret = (*txfilter.filter)(src, srcwidth, srcheight, srcformat,
g64crc, info);
return ret;
} }
boolean ext_ghq_hirestex(uint64 g64crc, uint64 r_crc64, unsigned short *palette, GHQTexInfo *info) boolean ext_ghq_hirestex(uint64 g64crc, uint64 r_crc64, unsigned short *palette, GHQTexInfo *info)
{ {
boolean ret = 0; boolean ret = txfilter_hirestex(g64crc, r_crc64, palette, info);
if (txfilter.hirestex)
ret = (*txfilter.hirestex)(g64crc, r_crc64, palette, info);
return ret; return ret;
} }
uint64 ext_ghq_checksum(unsigned char *src, int width, int height, int size, int rowStride, unsigned char *palette) uint64 ext_ghq_checksum(unsigned char *src, int width, int height, int size, int rowStride, unsigned char *palette)
{ {
uint64 ret = 0; uint64 ret = txfilter_checksum(src, width, height, size, rowStride, palette);
if (txfilter.checksum)
ret = (*txfilter.checksum)(src, width, height, size, rowStride, palette);
return ret; return ret;
} }
boolean ext_ghq_dmptx(unsigned char *src, int width, int height, int rowStridePixel, unsigned short gfmt, unsigned short n64fmt, uint64 r_crc64) boolean ext_ghq_dmptx(unsigned char *src, int width, int height, int rowStridePixel, unsigned short gfmt, unsigned short n64fmt, uint64 r_crc64)
{ {
boolean ret = 0; boolean ret = txfilter_dmptx(src, width, height, rowStridePixel, gfmt, n64fmt, r_crc64);
if (txfilter.dmptx)
ret = (*txfilter.dmptx)(src, width, height, rowStridePixel, gfmt, n64fmt, r_crc64);
return ret; return ret;
} }
boolean ext_ghq_reloadhirestex() boolean ext_ghq_reloadhirestex()
{ {
boolean ret = 0; boolean ret = txfilter_reloadhirestex();
if (txfilter.reloadhirestex)
ret = (*txfilter.reloadhirestex)();
return ret; return ret;
} }

View File

@ -44,7 +44,7 @@
#define CHDIR(a) chdir(a) #define CHDIR(a) chdir(a)
#endif #endif
#ifdef __MSC__ #ifdef WIN32
typedef __int64 int64; typedef __int64 int64;
typedef unsigned __int64 uint64; typedef unsigned __int64 uint64;
typedef unsigned char boolean; typedef unsigned char boolean;

View File

@ -39,7 +39,6 @@
Name="VCCLCompilerTool" Name="VCCLCompilerTool"
AdditionalIncludeDirectories="&quot;$(Root)Source/3rd Party/wx/include&quot;;&quot;$(Root)Source/3rd Party/wx/include/msvc&quot;;inc" AdditionalIncludeDirectories="&quot;$(Root)Source/3rd Party/wx/include&quot;;&quot;$(Root)Source/3rd Party/wx/include/msvc&quot;;inc"
PreprocessorDefinitions="__MSC__;" PreprocessorDefinitions="__MSC__;"
RuntimeLibrary="3"
UsePrecompiledHeader="0" UsePrecompiledHeader="0"
/> />
<Tool <Tool
@ -102,7 +101,6 @@
Name="VCCLCompilerTool" Name="VCCLCompilerTool"
AdditionalIncludeDirectories="&quot;$(Root)Source/3rd Party/wx/include&quot;;&quot;$(Root)Source/3rd Party/wx/include/msvc&quot;;inc" AdditionalIncludeDirectories="&quot;$(Root)Source/3rd Party/wx/include&quot;;&quot;$(Root)Source/3rd Party/wx/include/msvc&quot;;inc"
PreprocessorDefinitions="__MSC__" PreprocessorDefinitions="__MSC__"
RuntimeLibrary="2"
UsePrecompiledHeader="0" UsePrecompiledHeader="0"
/> />
<Tool <Tool
@ -236,10 +234,6 @@
RelativePath="TexCache.h" RelativePath="TexCache.h"
> >
</File> </File>
<File
RelativePath=".\texConv.cpp"
>
</File>
<File <File
RelativePath="TexConv.h" RelativePath="TexConv.h"
> >
@ -248,34 +242,18 @@
RelativePath="TexLoad.h" RelativePath="TexLoad.h"
> >
</File> </File>
<File
RelativePath=".\TexLoad16b.cpp"
>
</File>
<File <File
RelativePath="TexLoad16b.h" RelativePath="TexLoad16b.h"
> >
</File> </File>
<File
RelativePath=".\TexLoad32b.cpp"
>
</File>
<File <File
RelativePath="TexLoad32b.h" RelativePath="TexLoad32b.h"
> >
</File> </File>
<File
RelativePath=".\TexLoad4b.cpp"
>
</File>
<File <File
RelativePath="TexLoad4b.h" RelativePath="TexLoad4b.h"
> >
</File> </File>
<File
RelativePath=".\TexLoad8b.cpp"
>
</File>
<File <File
RelativePath="TexLoad8b.h" RelativePath="TexLoad8b.h"
> >
@ -289,7 +267,7 @@
> >
</File> </File>
<File <File
RelativePath="Texture.asm" RelativePath=".\Texture.asm.cpp"
> >
</File> </File>
</Filter> </Filter>

View File

@ -149,48 +149,7 @@ void ClearCache ()
//**************************************************************** //****************************************************************
// GetTexInfo - gets information for either t0 or t1, checks if in cache & fills tex_found // GetTexInfo - gets information for either t0 or t1, checks if in cache & fills tex_found
int asmTextureCRC(int addr, int width, int height, int line) extern "C" int asmTextureCRC(int addr, int width, int height, int line);
{
_asm {
push ebx
push edi
xor eax,eax ; eax is final result
mov ebx,[line]
mov ecx,[height] ; ecx is height counter
mov edi,[addr] ; edi is ptr to texture memory
crc_loop_y:
push ecx
mov ecx,[width]
crc_loop_x:
add eax,[edi] ; MUST be 64-bit aligned, so manually unroll
add eax,[edi+4]
mov edx,ecx
mul edx
add eax,edx
add edi,8
dec ecx
jnz crc_loop_x
pop ecx
mov edx,ecx
mul edx
add eax,edx
add edi,ebx
dec ecx
jnz crc_loop_y
pop edi
pop ebx
}
}
void GetTexInfo (int id, int tile) void GetTexInfo (int id, int tile)
{ {
FRDP (" | |-+ GetTexInfo (id: %d, tile: %d)\n", id, tile); FRDP (" | |-+ GetTexInfo (id: %d, tile: %d)\n", id, tile);
@ -1045,6 +1004,7 @@ void LoadTex (int id, int tmu)
// Get this cache object // Get this cache object
cache = voodoo.tex_UMA?&rdp.cache[0][rdp.n_cached[0]]:&rdp.cache[tmu][rdp.n_cached[tmu]]; cache = voodoo.tex_UMA?&rdp.cache[0][rdp.n_cached[0]]:&rdp.cache[tmu][rdp.n_cached[tmu]];
memset(cache,0,sizeof(*cache));
rdp.cur_cache[id] = cache; rdp.cur_cache[id] = cache;
rdp.cur_cache_n[id] = rdp.n_cached[tmu]; rdp.cur_cache_n[id] = rdp.n_cached[tmu];

View File

@ -1,303 +0,0 @@
#include "Gfx #1.3.h"
/*****************************************************************
16b textures load
*****************************************************************/
/*****************************************************************
; Size: 2, Format: 0
;
; 2008.03.29 cleaned up - H.Morii
; 2009 ported to NASM - Sergey (Gonetz) Lipski
*/
void asmLoad16bRGBA (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext)
{
_asm {
push ebx
push esi
push edi
mov esi,[src]
mov edi,[dst]
mov ecx,[height]
y_loop:
push ecx
mov ecx,[wid_64]
x_loop:
mov eax,[esi] ; read both pixels
mov ebx,[esi+4] ; read both pixels
bswap eax
bswap ebx
ror ax,1
ror bx,1
ror eax,16
ror ebx,16
ror ax,1
ror bx,1
mov [edi],eax
mov [edi+4],ebx
add esi,8
add edi,8
dec ecx
jnz x_loop
pop ecx
dec ecx
jz end_y_loop
push ecx
mov eax,esi
add eax,[line]
mov esi,[src]
sub eax, esi
and eax, 0xFFF
add esi, eax
add edi,[ext]
mov ecx,[wid_64]
x_loop_2:
mov eax,[esi+4] ; read both pixels
mov ebx,[esi] ; read both pixels
bswap eax
bswap ebx
ror ax,1
ror bx,1
ror eax,16
ror ebx,16
ror ax,1
ror bx,1
mov [edi],eax
mov [edi+4],ebx
add esi,8
add edi,8
dec ecx
jnz x_loop_2
mov eax,esi
add eax,[line]
mov esi,[src]
sub eax, esi
and eax, 0xFFF
add esi, eax
add edi,[ext]
pop ecx
dec ecx
jnz y_loop
end_y_loop:
pop edi
pop esi
pop ebx
}
}
/****************************************************************
; Size: 2, Format: 3
;
; ** by Gugaman/Dave2001 **
;
; 2008.03.29 cleaned up - H.Morii
; 2009 ported to NASM - Sergey (Gonetz) Lipski
*/
void asmLoad16bIA (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext)
{
_asm {
push ebx
push esi
push edi
mov esi,[src]
mov edi,[dst]
mov ecx,[height]
y_loop:
push ecx
mov ecx,[wid_64]
x_loop:
mov eax,[esi] ; read both pixels
mov ebx,[esi+4] ; read both pixels
mov [edi],eax
mov [edi+4],ebx
add esi,8
add edi,8
dec ecx
jnz x_loop
pop ecx
dec ecx
jz end_y_loop
push ecx
add esi,[line]
add edi,[ext]
mov ecx,[wid_64]
x_loop_2:
mov eax,[esi+4] ; read both pixels
mov ebx,[esi] ; read both pixels
mov [edi],eax
mov [edi+4],ebx
add esi,8
add edi,8
dec ecx
jnz x_loop_2
add esi,[line]
add edi,[ext]
pop ecx
dec ecx
jnz y_loop
end_y_loop:
pop edi
pop esi
pop ebx
}
}
/*****************************************************************
;16b textures mirror/clamp/wrap
;*****************************************************************/
void asmMirror16bS (int tex, int start, int width, int height, int mask, int line, int full, int count)
{
_asm {
push ebx
push esi
push edi
mov edi,[start]
mov ecx,[height]
loop_y:
xor edx,edx
loop_x:
mov esi,[tex]
mov ebx,[width]
add ebx,edx
and ebx,[width]
jnz is_mirrored
mov eax,edx
shl eax,1
and eax,[mask]
add esi,eax
mov ax,[esi]
mov [edi],ax
add edi,2
jmp end_mirror_check
is_mirrored:
add esi,[mask]
mov eax,edx
shl eax,1
and eax,[mask]
sub esi,eax
mov ax,[esi]
mov [edi],ax
add edi,2
end_mirror_check:
inc edx
cmp edx,[count]
jne loop_x
add edi,[line]
mov eax,[tex]
add eax,[full]
mov [tex],eax
dec ecx
jnz loop_y
pop edi
pop esi
pop ebx
}
}
void asmWrap16bS (int tex, int start, int height, int mask, int line, int full, int count)
{
_asm {
push ebx
push esi
push edi
mov edi,[start]
mov ecx,[height]
loop_y:
xor edx,edx
loop_x:
mov esi,[tex]
mov eax,edx
and eax,[mask]
shl eax,2
add esi,eax
mov eax,[esi]
mov [edi],eax
add edi,4
inc edx
cmp edx,[count]
jne loop_x
add edi,[line]
mov eax,[tex]
add eax,[full]
mov [tex],eax
dec ecx
jnz loop_y
pop edi
pop esi
pop ebx
}
}
void asmClamp16bS (int tex, int constant, int height,int line, int full, int count)
{
_asm {
push ebx
push esi
push edi
mov esi,[constant]
mov edi,[tex]
mov ecx,[height]
y_loop:
mov ax,[esi]
mov edx,[count]
x_loop:
mov [edi],ax ; don't unroll or make dword, it may go into next line (doesn't have to be multiple of two)
add edi,2
dec edx
jnz x_loop
add esi,[full]
add edi,[line]
dec ecx
jnz y_loop
pop edi
pop esi
pop ebx
}
}

View File

@ -1,139 +0,0 @@
#include "Gfx #1.3.h"
/*****************************************************************
;32b textures mirror/clamp/wrap
;*****************************************************************/
void asmMirror32bS (int tex, int start, int width, int height, int mask, int line, int full, int count)
{
_asm {
push ebx
push esi
push edi
mov edi,[start]
mov ecx,[height]
loop_y:
xor edx,edx
loop_x:
mov esi,[tex]
mov ebx,[width]
add ebx,edx
and ebx,[width]
jnz is_mirrored
mov eax,edx
shl eax,2
and eax,[mask]
add esi,eax
mov eax,[esi]
mov [edi],eax
add edi,4
jmp end_mirror_check
is_mirrored:
add esi,[mask]
mov eax,edx
shl eax,2
and eax,[mask]
sub esi,eax
mov eax,[esi]
mov [edi],eax
add edi,4
end_mirror_check:
inc edx
cmp edx,[count]
jne loop_x
add edi,[line]
mov eax,[tex]
add eax,[full]
mov [tex],eax
dec ecx
jnz loop_y
pop edi
pop esi
pop ebx
}
}
void asmWrap32bS (int tex, int start, int height, int mask, int line, int full, int count)
{
_asm {
push ebx
push esi
push edi
mov edi,[start]
mov ecx,[height]
loop_y:
xor edx,edx
loop_x:
mov esi,[tex]
mov eax,edx
and eax,[mask]
shl eax,2
add esi,eax
mov eax,[esi]
mov [edi],eax
add edi,4
inc edx
cmp edx,[count]
jne loop_x
add edi,[line]
mov eax,[tex]
add eax,[full]
mov [tex],eax
dec ecx
jnz loop_y
pop edi
pop esi
pop ebx
}
}
void asmClamp32bS (int tex, int constant, int height,int line, int full, int count)
{
_asm {
push ebx
push esi
push edi
mov esi,[constant]
mov edi,[tex]
mov ecx,[height]
y_loop:
mov eax,[esi]
mov edx,[count]
x_loop:
mov [edi],eax ; don't unroll or make dword, it may go into next line (doesn't have to be multiple of two)
add edi,4
dec edx
jnz x_loop
add esi,[full]
add edi,[line]
dec ecx
jnz y_loop
pop edi
pop esi
pop ebx
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,771 +0,0 @@
#include "Gfx #1.3.h"
/*****************************************************************
;8b textures load
;****************************************************************/
/*****************************************************************
; Size: 1, Format: 2
;
; 2008.03.29 cleaned up - H.Morii
; 2009 ported to NASM - Sergey (Gonetz) Lipski
*/
void asmLoad8bCI (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext, wxUIntPtr pal)
{
_asm {
push ebx
push esi
push edi
mov ebx,[pal]
mov esi,[src]
mov edi,[dst]
mov ecx,[height]
y_loop:
push ecx
mov ecx,[wid_64]
x_loop:
push ecx
mov eax,[esi] ; read all 4 pixels
bswap eax
add esi,4
mov edx,eax
; 1st dword output {
shr eax,15
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
mov eax,edx
shr eax,23
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
mov [edi],ecx
add edi,4
; }
; 2nd dword output {
mov eax,edx
shl eax,1
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
shr edx,7
and edx,0x1FE
mov cx,[ebx+edx]
ror cx,1
mov [edi],ecx
add edi,4
; }
; * copy
mov eax,[esi] ; read all 4 pixels
bswap eax
add esi,4
mov edx,eax
; 1st dword output {
shr eax,15
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
mov eax,edx
shr eax,23
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
mov [edi],ecx
add edi,4
; }
; 2nd dword output {
mov eax,edx
shl eax,1
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
shr edx,7
and edx,0x1FE
mov cx,[ebx+edx]
ror cx,1
mov [edi],ecx
add edi,4
; }
; *
pop ecx
dec ecx
jnz x_loop
pop ecx
dec ecx
jz near end_y_loop
push ecx
mov eax,esi
add eax,[line]
mov esi,[src]
sub eax,esi
and eax,0x7FF
add esi,eax
add edi,[ext]
mov ecx,[wid_64]
x_loop_2:
push ecx
mov eax,[esi+4] ; read all 4 pixels
bswap eax
mov edx,eax
; 1st dword output {
shr eax,15
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
mov eax,edx
shr eax,23
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
mov [edi],ecx
add edi,4
; }
; 2nd dword output {
mov eax,edx
shl eax,1
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
shr edx,7
and edx,0x1FE
mov cx,[ebx+edx]
ror cx,1
mov [edi],ecx
add edi,4
; }
; * copy
mov eax,[esi] ; read all 4 pixels
bswap eax
mov edx,esi
add edx,8
mov esi,[src]
sub edx,esi
and edx,0x7FF
add esi,edx
mov edx,eax
; 1st dword output {
shr eax,15
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
mov eax,edx
shr eax,23
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
mov [edi],ecx
add edi,4
; }
; 2nd dword output {
mov eax,edx
shl eax,1
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
shr edx,7
and edx,0x1FE
mov cx,[ebx+edx]
ror cx,1
mov [edi],ecx
add edi,4
; }
; *
pop ecx
dec ecx
jnz x_loop_2
mov eax,esi
add eax,[line]
mov esi,[src]
sub eax,esi
and eax,0x7FF
add esi,eax
add edi,[ext]
pop ecx
dec ecx
jnz y_loop
end_y_loop:
pop edi
pop esi
pop ebx
}
}
void asmLoad8bIA8 (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext, wxUIntPtr pal)
{
_asm {
push ebx
push esi
push edi
mov ebx,[pal]
mov esi,[src]
mov edi,[dst]
mov ecx,[height]
y_loop:
push ecx
mov ecx,[wid_64]
x_loop:
push ecx
mov eax,[esi] ; read all 4 pixels
bswap eax
add esi,4
mov edx,eax
; 1st dword output {
shr eax,15
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
mov eax,edx
shr eax,23
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
mov [edi],ecx
add edi,4
; }
; 2nd dword output {
mov eax,edx
shl eax,1
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
shr edx,7
and edx,0x1FE
mov cx,[ebx+edx]
ror cx,8
mov [edi],ecx
add edi,4
; }
; * copy
mov eax,[esi] ; read all 4 pixels
bswap eax
add esi,4
mov edx,eax
; 1st dword output {
shr eax,15
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
mov eax,edx
shr eax,23
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
mov [edi],ecx
add edi,4
; }
; 2nd dword output {
mov eax,edx
shl eax,1
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
shr edx,7
and edx,0x1FE
mov cx,[ebx+edx]
ror cx,8
mov [edi],ecx
add edi,4
; }
; *
pop ecx
dec ecx
jnz x_loop
pop ecx
dec ecx
jz near end_y_loop
push ecx
add esi,[line]
add edi,[ext]
mov ecx,[wid_64]
x_loop_2:
push ecx
mov eax,[esi+4] ; read all 4 pixels
bswap eax
mov edx,eax
; 1st dword output {
shr eax,15
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
mov eax,edx
shr eax,23
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
mov [edi],ecx
add edi,4
; }
; 2nd dword output {
mov eax,edx
shl eax,1
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
shr edx,7
and edx,0x1FE
mov cx,[ebx+edx]
ror cx,8
mov [edi],ecx
add edi,4
; }
; * copy
mov eax,[esi] ; read all 4 pixels
bswap eax
add esi,8
mov edx,eax
; 1st dword output {
shr eax,15
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
mov eax,edx
shr eax,23
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
mov [edi],ecx
add edi,4
; }
; 2nd dword output {
mov eax,edx
shl eax,1
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
shr edx,7
and edx,0x1FE
mov cx,[ebx+edx]
ror cx,8
mov [edi],ecx
add edi,4
; }
; *
pop ecx
dec ecx
jnz x_loop_2
add esi,[line]
add edi,[ext]
pop ecx
dec ecx
jnz y_loop
end_y_loop:
pop edi
pop esi
pop ebx
}
}
/*****************************************************************
; Size: 1, Format: 3
;
; ** by Gugaman **
;
; 2008.03.29 cleaned up - H.Morii
; 2009 ported to NASM - Sergey (Gonetz) Lipski
*/
void asmLoad8bIA4 (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext)
{
_asm {
push ebx
push esi
push edi
mov esi,[src]
mov edi,[dst]
mov ecx,[height]
y_loop:
push ecx
mov ecx,[wid_64]
x_loop:
mov eax,[esi] ; read all 4 pixels
mov edx,eax
shr eax,4 ;all alpha
shl edx,4
and eax,0x0F0F0F0F
and edx,0xF0F0F0F0
add esi,4
or eax,edx
mov [edi],eax ; save dword
add edi,4
mov eax,[esi] ; read all 4 pixels
mov edx,eax
shr eax,4 ;all alpha
shl edx,4
and eax,0x0F0F0F0F
and edx,0xF0F0F0F0
add esi,4
or eax,edx
mov [edi],eax ; save dword
add edi,4
; *
dec ecx
jnz x_loop
pop ecx
dec ecx
jz end_y_loop
push ecx
add esi,[line]
add edi,[ext]
mov ecx,[wid_64]
x_loop_2:
mov eax,[esi+4] ; read both pixels
mov edx,eax
shr eax,4 ;all alpha
shl edx,4
and eax,0x0F0F0F0F
and edx,0xF0F0F0F0
or eax,edx
mov [edi],eax ;save dword
add edi,4
mov eax,[esi] ; read both pixels
add esi,8
mov edx,eax
shr eax,4 ;all alpha
shl edx,4
and eax,0x0F0F0F0F
and edx,0xF0F0F0F0
or eax,edx
mov [edi],eax ;save dword
add edi,4
; *
dec ecx
jnz x_loop_2
add esi,[line]
add edi,[ext]
pop ecx
dec ecx
jnz y_loop
end_y_loop:
pop edi
pop esi
pop ebx
}
}
/*****************************************************************
; Size: 1, Format: 4
;
; ** by Gugaman **
; 2009 ported to NASM - Sergey (Gonetz) Lipski
*/
void asmLoad8bI (wxUIntPtr src, int dst, wxUIntPtr wid_64, int height, int line, int ext)
{
_asm {
push ebx
push esi
push edi
mov esi,[src]
mov edi,[dst]
mov ecx,[height]
y_loop:
push ecx
mov ecx,[wid_64]
x_loop:
mov eax,[esi] ; read all 4 pixels
add esi,4
mov [edi],eax ; save dword
add edi,4
mov eax,[esi] ; read all 4 pixels
add esi,4
mov [edi],eax ; save dword
add edi,4
; *
dec ecx
jnz x_loop
pop ecx
dec ecx
jz end_y_loop
push ecx
add esi,[line]
add edi,[ext]
mov ecx,[wid_64]
x_loop_2:
mov eax,[esi+4] ; read both pixels
mov [edi],eax ;save dword
add edi,4
mov eax,[esi] ; read both pixels
add esi,8
mov [edi],eax ;save dword
add edi,4
; *
dec ecx
jnz x_loop_2
add esi,[line]
add edi,[ext]
pop ecx
dec ecx
jnz y_loop
end_y_loop:
pop edi
pop esi
pop ebx
}
}
/*****************************************************************
;
; ******** Textures mirror/clamp/wrap ********
;
;*****************************************************************/
/*****************************************************************
;8b textures mirror/clamp/wrap
;*****************************************************************/
void asmMirror8bS (int tex, int start, int width, int height, int mask, int line, int full, int count)
{
_asm{
push ebx
push esi
push edi
mov edi,[start]
mov ecx,[height]
loop_y:
xor edx,edx
loop_x:
mov esi,[tex]
mov ebx,[width]
add ebx,edx
and ebx,[width]
jnz is_mirrored
mov eax,edx
and eax,[mask]
add esi,eax
mov al,[esi]
mov [edi],al
inc edi
jmp end_mirror_check
is_mirrored:
add esi,[mask]
mov eax,edx
and eax,[mask]
sub esi,eax
mov al,[esi]
mov [edi],al
inc edi
end_mirror_check:
inc edx
cmp edx,[count]
jne loop_x
add edi,[line]
mov eax,[tex]
add eax,[full]
mov [tex],eax
dec ecx
jnz loop_y
pop edi
pop esi
pop ebx
}
}
void asmWrap8bS (int tex, int start, int height, int mask, int line, int full, int count)
{
_asm {
push ebx
push esi
push edi
mov edi,[start]
mov ecx,[height]
loop_y:
xor edx,edx
loop_x:
mov esi,[tex]
mov eax,edx
and eax,[mask]
shl eax,2
add esi,eax
mov eax,[esi]
mov [edi],eax
add edi,4
inc edx
cmp edx,[count]
jne loop_x
add edi,[line]
mov eax,[tex]
add eax,[full]
mov [tex],eax
dec ecx
jnz loop_y
pop edi
pop esi
pop ebx
}
}
void asmClamp8bS (int tex, int constant, int height,int line, int full, int count)
{
_asm {
push ebx
push esi
push edi
mov esi,[constant]
mov edi,[tex]
mov ecx,[height]
y_loop:
mov al,[esi]
mov edx,[count]
x_loop:
mov [edi],al ; don't unroll or make dword, it may go into next line (doesn't have to be multiple of two)
inc edi
dec edx
jnz x_loop
add esi,[full]
add edi,[line]
dec ecx
jnz y_loop
pop edi
pop esi
pop ebx
}
}

View File

@ -1,59 +0,0 @@
;/*
;* Glide64 - Glide video plugin for Nintendo 64 emulators.
;*
;* This program is free software; you can redistribute it and/or modify
;* it under the terms of the GNU General Public License as published by
;* the Free Software Foundation; either version 2 of the License, or
;* any later version.
;*
;* This program is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;* GNU General Public License for more details.
;*
;* You should have received a copy of the GNU General Public License
;* along with this program; if not, write to the Free Software
;* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
;*/
;
;****************************************************************
;
; Glide64 - Glide Plugin for Nintendo 64 emulators
; Project started on December 29th, 2001
;
; Authors:
; Dave2001, original author, founded the project in 2001, left it in 2002
; Gugaman, joined the project in 2002, left it in 2002
; Sergey 'Gonetz' Lipski, joined the project in 2002, main author since fall of 2002
; Hiroshi 'KoolSmoky' Morii, joined the project in 2007
;
;****************************************************************
;
; To modify Glide64:
; * Write your name and (optional)email, commented by your work, so I know who did it, and so that you can find which parts you modified when it comes time to send it to me.
; * Do NOT send me the whole project or file that you modified. Take out your modified code sections, and tell me where to put them. If people sent the whole thing, I would have many different versions, but no idea how to combine them all.
;
;****************************************************************
%include "inc/c32.mac"
segment .text
;****************************************************************
;
; ******** Load block/tile ********
;
;****************************************************************
proc asmLoadBlock
CPU 586
%$src arg
%$dst arg
%$off arg
%$dxt arg
%$cnt arg
%$swp arg
endproc ;asmLoadBlock

File diff suppressed because it is too large Load Diff

View File

@ -48,33 +48,8 @@
#include "FBtoScreen.h" #include "FBtoScreen.h"
#include "CRC.h" #include "CRC.h"
/***************************************************************** extern "C" void SwapBlock32 ();
; SwapBlock - swaps every other 32-bit word at addr extern "C" void SwapBlock64 ();
;
; ecx = num_words -> 0
; edi = addr -> end of dest
;*****************************************************************/
__declspec(naked) void SwapBlock32 ()
{
_asm {
push eax
push ebx
or ecx,ecx
jz swapblock32_end
swapblock32_loop:
mov eax,[edi]
mov ebx,[edi+4]
mov [edi],ebx
mov [edi+4],eax
add edi,8
dec ecx
jnz swapblock32_loop
swapblock32_end:
pop ebx
pop eax
ret
}
}
const int NumOfFormats = 3; const int NumOfFormats = 3;
SCREEN_SHOT_FORMAT ScreenShotFormats[NumOfFormats] = { {wxT("BMP"), wxT("bmp"), wxBITMAP_TYPE_BMP}, {wxT("PNG"), wxT("png"), wxBITMAP_TYPE_PNG}, {wxT("JPEG"), wxT("jpeg"), wxBITMAP_TYPE_JPEG} }; SCREEN_SHOT_FORMAT ScreenShotFormats[NumOfFormats] = { {wxT("BMP"), wxT("bmp"), wxBITMAP_TYPE_BMP}, {wxT("PNG"), wxT("png"), wxBITMAP_TYPE_PNG}, {wxT("JPEG"), wxT("jpeg"), wxBITMAP_TYPE_JPEG} };
@ -1850,165 +1825,7 @@ void setTBufTex(wxUint16 t_mem, wxUint32 cnt)
} }
} }
/***************************************************************** extern "C" void asmLoadBlock(int src, int dst, int off, int dxt, int cnt, int swp);
; CopyBlock - copies a block from base_addr+offset to dest_addr, while unswapping the
; data within.
;
; edi = dest_addr -> end of dest
; ecx = num_words
; esi = base_addr (preserved)
; edx = offset (preserved)
;*****************************************************************/
__declspec(naked) void CopyBlock ( void )
{
_asm {
push eax
push ebx
push esi
push edx
or ecx,ecx
jz near copyblock_end
push ecx
; first, set the source address and check if not on a dword boundary
push esi
push edx
mov ebx,edx
and edx,0FFFFFFFCh
add esi,edx
and ebx,3 ; ebx = # we DON'T need to copy
jz copyblock_copy
mov edx,4 ; ecx = # we DO need to copy
sub edx,ebx
; load the first word, accounting for swapping
mov eax,[esi]
add esi,4
copyblock_precopy_skip:
rol eax,8
dec ebx
jnz copyblock_precopy_skip
copyblock_precopy_copy:
rol eax,8
mov [edi],al
inc edi
dec edx
jnz copyblock_precopy_copy
mov eax,[esi]
add esi,4
bswap eax
mov [edi],eax
add edi,4
dec ecx ; 1 less word to copy
jz copyblock_postcopy
copyblock_copy:
mov eax,[esi]
bswap eax
mov [edi],eax
mov eax,[esi+4]
bswap eax
mov [edi+4],eax
add esi,8
add edi,8
dec ecx
jnz copyblock_copy
copyblock_postcopy:
pop edx
pop esi
pop ecx
; check again if on dword boundary
mov ebx,edx ; ebx = # we DO need to copy
and ebx,3
jz copyblock_end
shl ecx,3 ; ecx = num_words * 8
add edx,ecx
and edx,0FFFFFFFCh
add esi,edx
mov eax,[esi]
copyblock_postcopy_copy:
rol eax,8
mov [edi],al
inc edi
dec ebx
jnz copyblock_postcopy_copy
copyblock_end:
pop edx
pop esi
pop ebx
pop eax
ret
}
}
void asmLoadBlock(int src, int dst, int off, int dxt, int cnt, wxUIntPtr swp)
{
_asm {
push ebx
push esi
push edi
; copy the data
mov esi,[src]
mov edi,[dst]
mov ecx,[cnt]
mov edx,[off]
call CopyBlock
; now swap it
mov eax,[cnt] ; eax = count remaining
xor edx,edx ; edx = dxt counter
mov edi,[dst]
mov ebx,[dxt]
xor ecx,ecx ; ecx = how much to copy
dxt_test:
add edi,8
dec eax
jz end_dxt_test
add edx,ebx
jns dxt_test
dxt_s_test:
inc ecx
dec eax
jz end_dxt_test
add edx,ebx
js dxt_s_test
; swap this data (ecx set, dst set)
call [swp] ; (ecx reset to 0 after)
jmp dxt_test ; and repeat
end_dxt_test:
; swap any remaining data
call [swp]
pop edi
pop esi
pop ebx
}
}
void LoadBlock32b(wxUint32 tile, wxUint32 ul_s, wxUint32 ul_t, wxUint32 lr_s, wxUint32 dxt); void LoadBlock32b(wxUint32 tile, wxUint32 ul_s, wxUint32 ul_t, wxUint32 lr_s, wxUint32 dxt);
static void rdp_loadblock() static void rdp_loadblock()
{ {
@ -2100,55 +1917,7 @@ static void rdp_loadblock()
setTBufTex(rdp.tiles[tile].t_mem, cnt); setTBufTex(rdp.tiles[tile].t_mem, cnt);
} }
void asmLoadTile(int src, int dst, int width, int height, int line, int off, int end) extern "C" void asmLoadTile(int src, int dst, int width, int height, int line, int off, int end);
{
_asm {
push ebx
push esi
push edi
; set initial values
mov edi,[dst]
mov ecx,[width]
mov esi,[src]
mov edx,[off]
xor ebx,ebx ; swap this line?
mov eax,[height]
loadtile_loop:
cmp [end],edi ; end of tmem: error
jc loadtile_end
; copy this line
push edi
push ecx
call CopyBlock
pop ecx
; swap it?
xor ebx,1
jnz loadtile_no_swap
; (ecx set, restore edi)
pop edi
call SwapBlock32
jmp loadtile_swap_end
loadtile_no_swap:
add sp,4 ; forget edi, we are already at the next position
loadtile_swap_end:
add edx,[line]
dec eax
jnz loadtile_loop
loadtile_end:
pop edi
pop esi
pop ebx
}
}
void LoadTile32b (wxUint32 tile, wxUint32 ul_s, wxUint32 ul_t, wxUint32 width, wxUint32 height); void LoadTile32b (wxUint32 tile, wxUint32 ul_s, wxUint32 ul_t, wxUint32 width, wxUint32 height);
static void rdp_loadtile() static void rdp_loadtile()
{ {

View File

@ -1,248 +0,0 @@
#include "Gfx #1.3.h"
/*****************************************************************
;
; ******** Textures conversion ********
;
;*****************************************************************/
void asmTexConv_ARGB1555_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int isize)
{
_asm {
push ebx
push esi
push edi
mov esi,[src]
mov edi,[dst]
mov ecx,[isize]
tc1_loop:
mov eax,[esi]
add esi,4
; arrr rrgg gggb bbbb
; aaaa rrrr gggg bbbb
mov edx,eax
and eax,0x80008000
mov ebx,eax ; ebx = 0xa000000000000000
shr eax,1
or ebx,eax ; ebx = 0xaa00000000000000
shr eax,1
or ebx,eax ; ebx = 0xaaa0000000000000
shr eax,1
or ebx,eax ; ebx = 0xaaaa000000000000
mov eax,edx
and eax,0x78007800 ; eax = 0x0rrrr00000000000
shr eax,3 ; eax = 0x0000rrrr00000000
or ebx,eax ; ebx = 0xaaaarrrr00000000
mov eax,edx
and eax,0x03c003c0 ; eax = 0x000000gggg000000
shr eax,2 ; eax = 0x00000000gggg0000
or ebx,eax ; ebx = 0xaaaarrrrgggg0000
and edx,0x001e001e ; edx = 0x00000000000bbbb0
shr edx,1 ; edx = 0x000000000000bbbb
or ebx,edx ; ebx = 0xaaaarrrrggggbbbb
mov [edi],ebx
add edi,4
dec ecx
jnz tc1_loop
pop edi
pop esi
pop ebx
}
}
void asmTexConv_AI88_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int isize)
{
_asm {
push ebx
push esi
push edi
mov esi,[src]
mov edi,[dst]
mov ecx,[isize]
tc1_loop:
mov eax,[esi]
add esi,4
; aaaa aaaa iiii iiii
; aaaa rrrr gggg bbbb
mov edx,eax
and eax,0xF000F000 ; eax = 0xaaaa000000000000
mov ebx,eax ; ebx = 0xaaaa000000000000
and edx,0x00F000F0 ; edx = 0x00000000iiii0000
shl edx,4 ; edx = 0x0000iiii00000000
or ebx,edx ; ebx = 0xaaaaiiii00000000
shr edx,4 ; edx = 0x00000000iiii0000
or ebx,edx ; ebx = 0xaaaaiiiiiiii0000
shr edx,4 ; edx = 0x000000000000iiii
or ebx,edx ; ebx = 0xaaaaiiiiiiiiiiii
mov [edi],ebx
add edi,4
dec ecx
jnz tc1_loop
pop edi
pop esi
pop ebx
}
}
void asmTexConv_AI44_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int isize)
{
_asm {
push ebx
push esi
push edi
mov esi,[src]
mov edi,[dst]
mov ecx,[isize]
tc1_loop:
mov eax,[esi]
add esi,4
; aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0
; aaaa1 rrrr1 gggg1 bbbb1 aaaa0 rrrr0 gggg0 bbbb0
; aaaa3 rrrr3 gggg3 bbbb3 aaaa2 rrrr2 gggg2 bbbb2
mov edx,eax ; eax = aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0
shl eax,16 ; eax = aaaa1 iiii1 aaaa0 iiii0 0000 0000 0000 0000
and eax,0xFF000000 ; eax = aaaa1 iiii1 0000 0000 0000 0000 0000 0000
mov ebx,eax ; ebx = aaaa1 iiii1 0000 0000 0000 0000 0000 0000
and eax,0x0F000000 ; eax = 0000 iiii1 0000 0000 0000 0000 0000 0000
shr eax,4 ; eax = 0000 0000 iiii1 0000 0000 0000 0000 0000
or ebx,eax ; ebx = aaaa1 iiii1 iiii1 0000 0000 0000 0000 0000
shr eax,4 ; eax = 0000 0000 0000 iiii1 0000 0000 0000 0000
or ebx,eax ; ebx = aaaa1 iiii1 iiii1 iiii1 0000 0000 0000 0000
mov eax,edx ; eax = aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0
shl eax,8 ; eax = aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0 0000 0000
and eax,0x0000FF00 ; eax = 0000 0000 0000 0000 aaaa0 iiii0 0000 0000
or ebx,eax ; ebx = aaaa1 iiii1 iiii1 iiii1 aaaa0 iiii0 0000 0000
and eax,0x00000F00 ; eax = 0000 0000 0000 0000 0000 iiii0 0000 0000
shr eax,4 ; eax = 0000 0000 0000 0000 0000 0000 iiii0 0000
or ebx,eax ; ebx = aaaa1 iiii1 iiii1 iiii1 aaaa0 iiii0 iiii0 0000
shr eax,4 ; eax = 0000 0000 0000 0000 0000 0000 0000 iiii0
or ebx,eax ; ebx = aaaa1 iiii1 iiii1 iiii1 aaaa0 iiii0 iiii0 iiii0
mov [edi],ebx
add edi,4
mov eax,edx ; eax = aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0
and eax,0xFF000000 ; eax = aaaa3 iiii3 0000 0000 0000 0000 0000 0000
mov ebx,eax ; ebx = aaaa3 iiii3 0000 0000 0000 0000 0000 0000
and eax,0x0F000000 ; eax = 0000 iiii3 0000 0000 0000 0000 0000 0000
shr eax,4 ; eax = 0000 0000 iiii3 0000 0000 0000 0000 0000
or ebx,eax ; ebx = aaaa3 iiii3 iiii3 0000 0000 0000 0000 0000
shr eax,4 ; eax = 0000 0000 0000 iiii3 0000 0000 0000 0000
or ebx,eax ; ebx = aaaa3 iiii3 iiii3 iiii3 0000 0000 0000 0000
; edx = aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0
shr edx,8 ; edx = 0000 0000 aaaa3 aaaa3 aaaa2 iiii2 aaaa1 iiii1
and edx,0x0000FF00 ; edx = 0000 0000 0000 0000 aaaa2 iiii2 0000 0000
or ebx,edx ; ebx = aaaa3 iiii3 iiii3 iiii3 aaaa2 iiii2 0000 0000
and edx,0x00000F00 ; edx = 0000 0000 0000 0000 0000 iiii2 0000 0000
shr edx,4 ; edx = 0000 0000 0000 0000 0000 0000 iiii2 0000
or ebx,edx ; ebx = aaaa3 iiii3 iiii3 iiii3 aaaa2 iiii2 iiii2 0000
shr edx,4 ; edx = 0000 0000 0000 0000 0000 0000 0000 iiii2
or ebx,edx ; ebx = aaaa3 iiii3 iiii3 iiii3 aaaa2 iiii2 iiii2 iiii2
mov [edi],ebx
add edi,4
dec ecx
jnz tc1_loop
pop edi
pop esi
pop ebx
}
}
void asmTexConv_A8_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int isize)
{
_asm {
push ebx
push esi
push edi
mov esi,[src]
mov edi,[dst]
mov ecx,[isize]
tc1_loop:
mov eax,[esi]
add esi,4
; aaaa3 aaaa3 aaaa2 aaaa2 aaaa1 aaaa1 aaaa0 aaaa0
; aaaa1 rrrr1 gggg1 bbbb1 aaaa0 rrrr0 gggg0 bbbb0
; aaaa3 rrrr3 gggg3 bbbb3 aaaa2 rrrr2 gggg2 bbbb2
mov edx,eax
and eax,0x0000F000 ; eax = 00 00 00 00 a1 00 00 00
shl eax,16 ; eax = a1 00 00 00 00 00 00 00
mov ebx,eax ; ebx = a1 00 00 00 00 00 00 00
shr eax,4
or ebx,eax ; ebx = a1 a1 00 00 00 00 00 00
shr eax,4
or ebx,eax ; ebx = a1 a1 a1 00 00 00 00 00
shr eax,4
or ebx,eax ; ebx = a1 a1 a1 a1 00 00 00 00
mov eax,edx
and eax,0x000000F0 ; eax = 00 00 00 00 00 00 a0 00
shl eax,8 ; eax = 00 00 00 00 a0 00 00 00
or ebx,eax
shr eax,4
or ebx,eax
shr eax,4
or ebx,eax
shr eax,4
or ebx,eax ; ebx = a1 a1 a1 a1 a0 a0 a0 a0
mov [edi],ebx
add edi,4
mov eax,edx ; eax = a3 a3 a2 a2 a1 a1 a0 a0
and eax,0xF0000000 ; eax = a3 00 00 00 00 00 00 00
mov ebx,eax ; ebx = a3 00 00 00 00 00 00 00
shr eax,4
or ebx,eax ; ebx = a3 a3 00 00 00 00 00 00
shr eax,4
or ebx,eax ; ebx = a3 a3 a3 00 00 00 00 00
shr eax,4
or ebx,eax ; ebx = a3 a3 a3 a3 00 00 00 00
and edx,0x00F00000 ; eax = 00 00 a2 00 00 00 00 00
shr edx,8 ; eax = 00 00 00 00 a2 00 00 00
or ebx,edx
shr edx,4
or ebx,edx
shr edx,4
or ebx,edx
shr edx,4
or ebx,edx ; ebx = a3 a3 a3 a3 a2 a2 a2 a2
mov [edi],ebx
add edi,4
dec ecx
jnz tc1_loop
pop edi
pop esi
pop ebx
}
}