Glide64: Redo the asm for textures

This commit is contained in:
zilmar 2013-04-17 20:32:38 +10:00
parent e928a5995d
commit 4cbd5c538d
12 changed files with 3879 additions and 3834 deletions

View File

@ -25,140 +25,53 @@
#include <stdlib.h>
#include "Ext_TxFilter.h"
typedef boolean (*txfilter_init)(int maxwidth, int maxheight, int maxbpp,
int options, int cachesize,
wchar_t *path, wchar_t *ident,
dispInfoFuncExt callback);
typedef void (*txfilter_shutdown)(void);
typedef boolean (*txfilter_filter)(unsigned char *src, int srcwidth, int srcheight, unsigned short srcformat,
uint64 g64crc, GHQTexInfo *info);
typedef boolean (*txfilter_hirestex)(uint64 g64crc, uint64 r_crc64, unsigned short *palette, GHQTexInfo *info);
typedef uint64 (*txfilter_checksum)(unsigned char *src, int width, int height, int size, int rowStride, unsigned char *palette);
typedef boolean (*txfilter_dmptx)(unsigned char *src, int width, int height, int rowStridePixel, unsigned short gfmt, unsigned short n64fmt, uint64 r_crc64);
typedef boolean (*txfilter_reloadhirestex)();
static struct {
TXHMODULE lib;
txfilter_init init;
txfilter_shutdown shutdown;
txfilter_filter filter;
txfilter_hirestex hirestex;
txfilter_checksum checksum;
txfilter_dmptx dmptx;
txfilter_reloadhirestex reloadhirestex;
} txfilter;
extern "C" boolean txfilter_init(int maxwidth, int maxheight, int maxbpp, int options, int cachesize, wchar_t *path, wchar_t *ident, dispInfoFuncExt callback);
extern "C" void txfilter_shutdown(void);
extern "C" boolean txfilter(unsigned char *src, int srcwidth, int srcheight, unsigned short srcformat, uint64 g64crc, GHQTexInfo *info);
extern "C" boolean txfilter_hirestex(uint64 g64crc, uint64 r_crc64, unsigned short *palette, GHQTexInfo *info);
extern "C" uint64 txfilter_checksum(unsigned char *src, int width, int height, int size, int rowStride, unsigned char *palette);
extern "C" boolean txfilter_dmptx(unsigned char *src, int width, int height, int rowStridePixel, unsigned short gfmt, unsigned short n64fmt, uint64 r_crc64);
extern "C" boolean txfilter_reloadhirestex();
void ext_ghq_shutdown(void)
{
if (txfilter.shutdown)
(*txfilter.shutdown)();
if (txfilter.lib) {
DLCLOSE(txfilter.lib);
memset(&txfilter, 0, sizeof(txfilter));
}
txfilter_shutdown();
}
boolean ext_ghq_init(int maxwidth, int maxheight, int maxbpp, int options, int cachesize,
wchar_t *path, wchar_t *ident,
dispInfoFuncExt callback)
{
boolean bRet = 0;
if (!txfilter.lib) {
wchar_t curpath[MAX_PATH];
wcscpy(curpath, path);
#ifdef WIN32
wcscat(curpath, L"\\GlideHQ.dll");
txfilter.lib = DLOPEN(curpath);
#else
char cbuf[MAX_PATH];
wcscat(curpath, L"/GlideHQ.so");
wcstombs(cbuf, curpath, MAX_PATH);
txfilter.lib = DLOPEN(cbuf);
#endif
}
if (txfilter.lib) {
if (!txfilter.init)
txfilter.init = (txfilter_init)DLSYM(txfilter.lib, "txfilter_init");
if (!txfilter.shutdown)
txfilter.shutdown = (txfilter_shutdown)DLSYM(txfilter.lib, "txfilter_shutdown");
if (!txfilter.filter)
txfilter.filter = (txfilter_filter)DLSYM(txfilter.lib, "txfilter");
if (!txfilter.hirestex)
txfilter.hirestex = (txfilter_hirestex)DLSYM(txfilter.lib, "txfilter_hirestex");
if (!txfilter.checksum)
txfilter.checksum = (txfilter_checksum)DLSYM(txfilter.lib, "txfilter_checksum");
if (!txfilter.dmptx)
txfilter.dmptx = (txfilter_dmptx)DLSYM(txfilter.lib, "txfilter_dmptx");
if (!txfilter.reloadhirestex)
txfilter.reloadhirestex = (txfilter_reloadhirestex)DLSYM(txfilter.lib, "txfilter_reloadhirestex");
}
if (txfilter.init && txfilter.shutdown && txfilter.filter &&
txfilter.hirestex && txfilter.checksum /*&& txfilter.dmptx && txfilter.reloadhirestex */)
bRet = (*txfilter.init)(maxwidth, maxheight, maxbpp, options, cachesize, path, ident, callback);
else
ext_ghq_shutdown();
return bRet;
return txfilter_init(maxwidth, maxheight, maxbpp, options, cachesize, path, ident, callback);
}
boolean ext_ghq_txfilter(unsigned char *src, int srcwidth, int srcheight, unsigned short srcformat,
uint64 g64crc, GHQTexInfo *info)
{
boolean ret = 0;
if (txfilter.filter)
ret = (*txfilter.filter)(src, srcwidth, srcheight, srcformat,
g64crc, info);
return ret;
return txfilter(src, srcwidth, srcheight, srcformat, g64crc, info);;
}
boolean ext_ghq_hirestex(uint64 g64crc, uint64 r_crc64, unsigned short *palette, GHQTexInfo *info)
{
boolean ret = 0;
if (txfilter.hirestex)
ret = (*txfilter.hirestex)(g64crc, r_crc64, palette, info);
boolean ret = txfilter_hirestex(g64crc, r_crc64, palette, info);
return ret;
}
uint64 ext_ghq_checksum(unsigned char *src, int width, int height, int size, int rowStride, unsigned char *palette)
{
uint64 ret = 0;
if (txfilter.checksum)
ret = (*txfilter.checksum)(src, width, height, size, rowStride, palette);
uint64 ret = txfilter_checksum(src, width, height, size, rowStride, palette);
return ret;
}
boolean ext_ghq_dmptx(unsigned char *src, int width, int height, int rowStridePixel, unsigned short gfmt, unsigned short n64fmt, uint64 r_crc64)
{
boolean ret = 0;
if (txfilter.dmptx)
ret = (*txfilter.dmptx)(src, width, height, rowStridePixel, gfmt, n64fmt, r_crc64);
boolean ret = txfilter_dmptx(src, width, height, rowStridePixel, gfmt, n64fmt, r_crc64);
return ret;
}
boolean ext_ghq_reloadhirestex()
{
boolean ret = 0;
if (txfilter.reloadhirestex)
ret = (*txfilter.reloadhirestex)();
boolean ret = txfilter_reloadhirestex();
return ret;
}

View File

@ -44,7 +44,7 @@
#define CHDIR(a) chdir(a)
#endif
#ifdef __MSC__
#ifdef WIN32
typedef __int64 int64;
typedef unsigned __int64 uint64;
typedef unsigned char boolean;

View File

@ -39,7 +39,6 @@
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="&quot;$(Root)Source/3rd Party/wx/include&quot;;&quot;$(Root)Source/3rd Party/wx/include/msvc&quot;;inc"
PreprocessorDefinitions="__MSC__;"
RuntimeLibrary="3"
UsePrecompiledHeader="0"
/>
<Tool
@ -102,7 +101,6 @@
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="&quot;$(Root)Source/3rd Party/wx/include&quot;;&quot;$(Root)Source/3rd Party/wx/include/msvc&quot;;inc"
PreprocessorDefinitions="__MSC__"
RuntimeLibrary="2"
UsePrecompiledHeader="0"
/>
<Tool
@ -236,10 +234,6 @@
RelativePath="TexCache.h"
>
</File>
<File
RelativePath=".\texConv.cpp"
>
</File>
<File
RelativePath="TexConv.h"
>
@ -248,34 +242,18 @@
RelativePath="TexLoad.h"
>
</File>
<File
RelativePath=".\TexLoad16b.cpp"
>
</File>
<File
RelativePath="TexLoad16b.h"
>
</File>
<File
RelativePath=".\TexLoad32b.cpp"
>
</File>
<File
RelativePath="TexLoad32b.h"
>
</File>
<File
RelativePath=".\TexLoad4b.cpp"
>
</File>
<File
RelativePath="TexLoad4b.h"
>
</File>
<File
RelativePath=".\TexLoad8b.cpp"
>
</File>
<File
RelativePath="TexLoad8b.h"
>
@ -289,7 +267,7 @@
>
</File>
<File
RelativePath="Texture.asm"
RelativePath=".\Texture.asm.cpp"
>
</File>
</Filter>

View File

@ -149,48 +149,7 @@ void ClearCache ()
//****************************************************************
// GetTexInfo - gets information for either t0 or t1, checks if in cache & fills tex_found
int asmTextureCRC(int addr, int width, int height, int line)
{
_asm {
push ebx
push edi
xor eax,eax ; eax is final result
mov ebx,[line]
mov ecx,[height] ; ecx is height counter
mov edi,[addr] ; edi is ptr to texture memory
crc_loop_y:
push ecx
mov ecx,[width]
crc_loop_x:
add eax,[edi] ; MUST be 64-bit aligned, so manually unroll
add eax,[edi+4]
mov edx,ecx
mul edx
add eax,edx
add edi,8
dec ecx
jnz crc_loop_x
pop ecx
mov edx,ecx
mul edx
add eax,edx
add edi,ebx
dec ecx
jnz crc_loop_y
pop edi
pop ebx
}
}
extern "C" int asmTextureCRC(int addr, int width, int height, int line);
void GetTexInfo (int id, int tile)
{
FRDP (" | |-+ GetTexInfo (id: %d, tile: %d)\n", id, tile);
@ -1045,6 +1004,7 @@ void LoadTex (int id, int tmu)
// Get this cache object
cache = voodoo.tex_UMA?&rdp.cache[0][rdp.n_cached[0]]:&rdp.cache[tmu][rdp.n_cached[tmu]];
memset(cache,0,sizeof(*cache));
rdp.cur_cache[id] = cache;
rdp.cur_cache_n[id] = rdp.n_cached[tmu];

View File

@ -1,303 +0,0 @@
#include "Gfx #1.3.h"
/*****************************************************************
16b textures load
*****************************************************************/
/*****************************************************************
; Size: 2, Format: 0
;
; 2008.03.29 cleaned up - H.Morii
; 2009 ported to NASM - Sergey (Gonetz) Lipski
*/
void asmLoad16bRGBA (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext)
{
_asm {
push ebx
push esi
push edi
mov esi,[src]
mov edi,[dst]
mov ecx,[height]
y_loop:
push ecx
mov ecx,[wid_64]
x_loop:
mov eax,[esi] ; read both pixels
mov ebx,[esi+4] ; read both pixels
bswap eax
bswap ebx
ror ax,1
ror bx,1
ror eax,16
ror ebx,16
ror ax,1
ror bx,1
mov [edi],eax
mov [edi+4],ebx
add esi,8
add edi,8
dec ecx
jnz x_loop
pop ecx
dec ecx
jz end_y_loop
push ecx
mov eax,esi
add eax,[line]
mov esi,[src]
sub eax, esi
and eax, 0xFFF
add esi, eax
add edi,[ext]
mov ecx,[wid_64]
x_loop_2:
mov eax,[esi+4] ; read both pixels
mov ebx,[esi] ; read both pixels
bswap eax
bswap ebx
ror ax,1
ror bx,1
ror eax,16
ror ebx,16
ror ax,1
ror bx,1
mov [edi],eax
mov [edi+4],ebx
add esi,8
add edi,8
dec ecx
jnz x_loop_2
mov eax,esi
add eax,[line]
mov esi,[src]
sub eax, esi
and eax, 0xFFF
add esi, eax
add edi,[ext]
pop ecx
dec ecx
jnz y_loop
end_y_loop:
pop edi
pop esi
pop ebx
}
}
/****************************************************************
; Size: 2, Format: 3
;
; ** by Gugaman/Dave2001 **
;
; 2008.03.29 cleaned up - H.Morii
; 2009 ported to NASM - Sergey (Gonetz) Lipski
*/
void asmLoad16bIA (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext)
{
_asm {
push ebx
push esi
push edi
mov esi,[src]
mov edi,[dst]
mov ecx,[height]
y_loop:
push ecx
mov ecx,[wid_64]
x_loop:
mov eax,[esi] ; read both pixels
mov ebx,[esi+4] ; read both pixels
mov [edi],eax
mov [edi+4],ebx
add esi,8
add edi,8
dec ecx
jnz x_loop
pop ecx
dec ecx
jz end_y_loop
push ecx
add esi,[line]
add edi,[ext]
mov ecx,[wid_64]
x_loop_2:
mov eax,[esi+4] ; read both pixels
mov ebx,[esi] ; read both pixels
mov [edi],eax
mov [edi+4],ebx
add esi,8
add edi,8
dec ecx
jnz x_loop_2
add esi,[line]
add edi,[ext]
pop ecx
dec ecx
jnz y_loop
end_y_loop:
pop edi
pop esi
pop ebx
}
}
/*****************************************************************
;16b textures mirror/clamp/wrap
;*****************************************************************/
void asmMirror16bS (int tex, int start, int width, int height, int mask, int line, int full, int count)
{
_asm {
push ebx
push esi
push edi
mov edi,[start]
mov ecx,[height]
loop_y:
xor edx,edx
loop_x:
mov esi,[tex]
mov ebx,[width]
add ebx,edx
and ebx,[width]
jnz is_mirrored
mov eax,edx
shl eax,1
and eax,[mask]
add esi,eax
mov ax,[esi]
mov [edi],ax
add edi,2
jmp end_mirror_check
is_mirrored:
add esi,[mask]
mov eax,edx
shl eax,1
and eax,[mask]
sub esi,eax
mov ax,[esi]
mov [edi],ax
add edi,2
end_mirror_check:
inc edx
cmp edx,[count]
jne loop_x
add edi,[line]
mov eax,[tex]
add eax,[full]
mov [tex],eax
dec ecx
jnz loop_y
pop edi
pop esi
pop ebx
}
}
void asmWrap16bS (int tex, int start, int height, int mask, int line, int full, int count)
{
_asm {
push ebx
push esi
push edi
mov edi,[start]
mov ecx,[height]
loop_y:
xor edx,edx
loop_x:
mov esi,[tex]
mov eax,edx
and eax,[mask]
shl eax,2
add esi,eax
mov eax,[esi]
mov [edi],eax
add edi,4
inc edx
cmp edx,[count]
jne loop_x
add edi,[line]
mov eax,[tex]
add eax,[full]
mov [tex],eax
dec ecx
jnz loop_y
pop edi
pop esi
pop ebx
}
}
void asmClamp16bS (int tex, int constant, int height,int line, int full, int count)
{
_asm {
push ebx
push esi
push edi
mov esi,[constant]
mov edi,[tex]
mov ecx,[height]
y_loop:
mov ax,[esi]
mov edx,[count]
x_loop:
mov [edi],ax ; don't unroll or make dword, it may go into next line (doesn't have to be multiple of two)
add edi,2
dec edx
jnz x_loop
add esi,[full]
add edi,[line]
dec ecx
jnz y_loop
pop edi
pop esi
pop ebx
}
}

View File

@ -1,139 +0,0 @@
#include "Gfx #1.3.h"
/*****************************************************************
;32b textures mirror/clamp/wrap
;*****************************************************************/
void asmMirror32bS (int tex, int start, int width, int height, int mask, int line, int full, int count)
{
_asm {
push ebx
push esi
push edi
mov edi,[start]
mov ecx,[height]
loop_y:
xor edx,edx
loop_x:
mov esi,[tex]
mov ebx,[width]
add ebx,edx
and ebx,[width]
jnz is_mirrored
mov eax,edx
shl eax,2
and eax,[mask]
add esi,eax
mov eax,[esi]
mov [edi],eax
add edi,4
jmp end_mirror_check
is_mirrored:
add esi,[mask]
mov eax,edx
shl eax,2
and eax,[mask]
sub esi,eax
mov eax,[esi]
mov [edi],eax
add edi,4
end_mirror_check:
inc edx
cmp edx,[count]
jne loop_x
add edi,[line]
mov eax,[tex]
add eax,[full]
mov [tex],eax
dec ecx
jnz loop_y
pop edi
pop esi
pop ebx
}
}
void asmWrap32bS (int tex, int start, int height, int mask, int line, int full, int count)
{
_asm {
push ebx
push esi
push edi
mov edi,[start]
mov ecx,[height]
loop_y:
xor edx,edx
loop_x:
mov esi,[tex]
mov eax,edx
and eax,[mask]
shl eax,2
add esi,eax
mov eax,[esi]
mov [edi],eax
add edi,4
inc edx
cmp edx,[count]
jne loop_x
add edi,[line]
mov eax,[tex]
add eax,[full]
mov [tex],eax
dec ecx
jnz loop_y
pop edi
pop esi
pop ebx
}
}
void asmClamp32bS (int tex, int constant, int height,int line, int full, int count)
{
_asm {
push ebx
push esi
push edi
mov esi,[constant]
mov edi,[tex]
mov ecx,[height]
y_loop:
mov eax,[esi]
mov edx,[count]
x_loop:
mov [edi],eax ; don't unroll or make dword, it may go into next line (doesn't have to be multiple of two)
add edi,4
dec edx
jnz x_loop
add esi,[full]
add edi,[line]
dec ecx
jnz y_loop
pop edi
pop esi
pop ebx
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,771 +0,0 @@
#include "Gfx #1.3.h"
/*****************************************************************
;8b textures load
;****************************************************************/
/*****************************************************************
; Size: 1, Format: 2
;
; 2008.03.29 cleaned up - H.Morii
; 2009 ported to NASM - Sergey (Gonetz) Lipski
*/
void asmLoad8bCI (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext, wxUIntPtr pal)
{
_asm {
push ebx
push esi
push edi
mov ebx,[pal]
mov esi,[src]
mov edi,[dst]
mov ecx,[height]
y_loop:
push ecx
mov ecx,[wid_64]
x_loop:
push ecx
mov eax,[esi] ; read all 4 pixels
bswap eax
add esi,4
mov edx,eax
; 1st dword output {
shr eax,15
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
mov eax,edx
shr eax,23
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
mov [edi],ecx
add edi,4
; }
; 2nd dword output {
mov eax,edx
shl eax,1
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
shr edx,7
and edx,0x1FE
mov cx,[ebx+edx]
ror cx,1
mov [edi],ecx
add edi,4
; }
; * copy
mov eax,[esi] ; read all 4 pixels
bswap eax
add esi,4
mov edx,eax
; 1st dword output {
shr eax,15
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
mov eax,edx
shr eax,23
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
mov [edi],ecx
add edi,4
; }
; 2nd dword output {
mov eax,edx
shl eax,1
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
shr edx,7
and edx,0x1FE
mov cx,[ebx+edx]
ror cx,1
mov [edi],ecx
add edi,4
; }
; *
pop ecx
dec ecx
jnz x_loop
pop ecx
dec ecx
jz near end_y_loop
push ecx
mov eax,esi
add eax,[line]
mov esi,[src]
sub eax,esi
and eax,0x7FF
add esi,eax
add edi,[ext]
mov ecx,[wid_64]
x_loop_2:
push ecx
mov eax,[esi+4] ; read all 4 pixels
bswap eax
mov edx,eax
; 1st dword output {
shr eax,15
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
mov eax,edx
shr eax,23
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
mov [edi],ecx
add edi,4
; }
; 2nd dword output {
mov eax,edx
shl eax,1
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
shr edx,7
and edx,0x1FE
mov cx,[ebx+edx]
ror cx,1
mov [edi],ecx
add edi,4
; }
; * copy
mov eax,[esi] ; read all 4 pixels
bswap eax
mov edx,esi
add edx,8
mov esi,[src]
sub edx,esi
and edx,0x7FF
add esi,edx
mov edx,eax
; 1st dword output {
shr eax,15
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
mov eax,edx
shr eax,23
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
mov [edi],ecx
add edi,4
; }
; 2nd dword output {
mov eax,edx
shl eax,1
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
shr edx,7
and edx,0x1FE
mov cx,[ebx+edx]
ror cx,1
mov [edi],ecx
add edi,4
; }
; *
pop ecx
dec ecx
jnz x_loop_2
mov eax,esi
add eax,[line]
mov esi,[src]
sub eax,esi
and eax,0x7FF
add esi,eax
add edi,[ext]
pop ecx
dec ecx
jnz y_loop
end_y_loop:
pop edi
pop esi
pop ebx
}
}
void asmLoad8bIA8 (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext, wxUIntPtr pal)
{
_asm {
push ebx
push esi
push edi
mov ebx,[pal]
mov esi,[src]
mov edi,[dst]
mov ecx,[height]
y_loop:
push ecx
mov ecx,[wid_64]
x_loop:
push ecx
mov eax,[esi] ; read all 4 pixels
bswap eax
add esi,4
mov edx,eax
; 1st dword output {
shr eax,15
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
mov eax,edx
shr eax,23
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
mov [edi],ecx
add edi,4
; }
; 2nd dword output {
mov eax,edx
shl eax,1
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
shr edx,7
and edx,0x1FE
mov cx,[ebx+edx]
ror cx,8
mov [edi],ecx
add edi,4
; }
; * copy
mov eax,[esi] ; read all 4 pixels
bswap eax
add esi,4
mov edx,eax
; 1st dword output {
shr eax,15
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
mov eax,edx
shr eax,23
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
mov [edi],ecx
add edi,4
; }
; 2nd dword output {
mov eax,edx
shl eax,1
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
shr edx,7
and edx,0x1FE
mov cx,[ebx+edx]
ror cx,8
mov [edi],ecx
add edi,4
; }
; *
pop ecx
dec ecx
jnz x_loop
pop ecx
dec ecx
jz near end_y_loop
push ecx
add esi,[line]
add edi,[ext]
mov ecx,[wid_64]
x_loop_2:
push ecx
mov eax,[esi+4] ; read all 4 pixels
bswap eax
mov edx,eax
; 1st dword output {
shr eax,15
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
mov eax,edx
shr eax,23
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
mov [edi],ecx
add edi,4
; }
; 2nd dword output {
mov eax,edx
shl eax,1
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
shr edx,7
and edx,0x1FE
mov cx,[ebx+edx]
ror cx,8
mov [edi],ecx
add edi,4
; }
; * copy
mov eax,[esi] ; read all 4 pixels
bswap eax
add esi,8
mov edx,eax
; 1st dword output {
shr eax,15
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
mov eax,edx
shr eax,23
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
mov [edi],ecx
add edi,4
; }
; 2nd dword output {
mov eax,edx
shl eax,1
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
shr edx,7
and edx,0x1FE
mov cx,[ebx+edx]
ror cx,8
mov [edi],ecx
add edi,4
; }
; *
pop ecx
dec ecx
jnz x_loop_2
add esi,[line]
add edi,[ext]
pop ecx
dec ecx
jnz y_loop
end_y_loop:
pop edi
pop esi
pop ebx
}
}
/*****************************************************************
; Size: 1, Format: 3
;
; ** by Gugaman **
;
; 2008.03.29 cleaned up - H.Morii
; 2009 ported to NASM - Sergey (Gonetz) Lipski
*/
void asmLoad8bIA4 (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext)
{
_asm {
push ebx
push esi
push edi
mov esi,[src]
mov edi,[dst]
mov ecx,[height]
y_loop:
push ecx
mov ecx,[wid_64]
x_loop:
mov eax,[esi] ; read all 4 pixels
mov edx,eax
shr eax,4 ;all alpha
shl edx,4
and eax,0x0F0F0F0F
and edx,0xF0F0F0F0
add esi,4
or eax,edx
mov [edi],eax ; save dword
add edi,4
mov eax,[esi] ; read all 4 pixels
mov edx,eax
shr eax,4 ;all alpha
shl edx,4
and eax,0x0F0F0F0F
and edx,0xF0F0F0F0
add esi,4
or eax,edx
mov [edi],eax ; save dword
add edi,4
; *
dec ecx
jnz x_loop
pop ecx
dec ecx
jz end_y_loop
push ecx
add esi,[line]
add edi,[ext]
mov ecx,[wid_64]
x_loop_2:
mov eax,[esi+4] ; read both pixels
mov edx,eax
shr eax,4 ;all alpha
shl edx,4
and eax,0x0F0F0F0F
and edx,0xF0F0F0F0
or eax,edx
mov [edi],eax ;save dword
add edi,4
mov eax,[esi] ; read both pixels
add esi,8
mov edx,eax
shr eax,4 ;all alpha
shl edx,4
and eax,0x0F0F0F0F
and edx,0xF0F0F0F0
or eax,edx
mov [edi],eax ;save dword
add edi,4
; *
dec ecx
jnz x_loop_2
add esi,[line]
add edi,[ext]
pop ecx
dec ecx
jnz y_loop
end_y_loop:
pop edi
pop esi
pop ebx
}
}
/*****************************************************************
; Size: 1, Format: 4
;
; ** by Gugaman **
; 2009 ported to NASM - Sergey (Gonetz) Lipski
*/
void asmLoad8bI (wxUIntPtr src, int dst, wxUIntPtr wid_64, int height, int line, int ext)
{
_asm {
push ebx
push esi
push edi
mov esi,[src]
mov edi,[dst]
mov ecx,[height]
y_loop:
push ecx
mov ecx,[wid_64]
x_loop:
mov eax,[esi] ; read all 4 pixels
add esi,4
mov [edi],eax ; save dword
add edi,4
mov eax,[esi] ; read all 4 pixels
add esi,4
mov [edi],eax ; save dword
add edi,4
; *
dec ecx
jnz x_loop
pop ecx
dec ecx
jz end_y_loop
push ecx
add esi,[line]
add edi,[ext]
mov ecx,[wid_64]
x_loop_2:
mov eax,[esi+4] ; read both pixels
mov [edi],eax ;save dword
add edi,4
mov eax,[esi] ; read both pixels
add esi,8
mov [edi],eax ;save dword
add edi,4
; *
dec ecx
jnz x_loop_2
add esi,[line]
add edi,[ext]
pop ecx
dec ecx
jnz y_loop
end_y_loop:
pop edi
pop esi
pop ebx
}
}
/*****************************************************************
;
; ******** Textures mirror/clamp/wrap ********
;
;*****************************************************************/
/*****************************************************************
;8b textures mirror/clamp/wrap
;*****************************************************************/
void asmMirror8bS (int tex, int start, int width, int height, int mask, int line, int full, int count)
{
_asm{
push ebx
push esi
push edi
mov edi,[start]
mov ecx,[height]
loop_y:
xor edx,edx
loop_x:
mov esi,[tex]
mov ebx,[width]
add ebx,edx
and ebx,[width]
jnz is_mirrored
mov eax,edx
and eax,[mask]
add esi,eax
mov al,[esi]
mov [edi],al
inc edi
jmp end_mirror_check
is_mirrored:
add esi,[mask]
mov eax,edx
and eax,[mask]
sub esi,eax
mov al,[esi]
mov [edi],al
inc edi
end_mirror_check:
inc edx
cmp edx,[count]
jne loop_x
add edi,[line]
mov eax,[tex]
add eax,[full]
mov [tex],eax
dec ecx
jnz loop_y
pop edi
pop esi
pop ebx
}
}
void asmWrap8bS (int tex, int start, int height, int mask, int line, int full, int count)
{
_asm {
push ebx
push esi
push edi
mov edi,[start]
mov ecx,[height]
loop_y:
xor edx,edx
loop_x:
mov esi,[tex]
mov eax,edx
and eax,[mask]
shl eax,2
add esi,eax
mov eax,[esi]
mov [edi],eax
add edi,4
inc edx
cmp edx,[count]
jne loop_x
add edi,[line]
mov eax,[tex]
add eax,[full]
mov [tex],eax
dec ecx
jnz loop_y
pop edi
pop esi
pop ebx
}
}
void asmClamp8bS (int tex, int constant, int height,int line, int full, int count)
{
_asm {
push ebx
push esi
push edi
mov esi,[constant]
mov edi,[tex]
mov ecx,[height]
y_loop:
mov al,[esi]
mov edx,[count]
x_loop:
mov [edi],al ; don't unroll or make dword, it may go into next line (doesn't have to be multiple of two)
inc edi
dec edx
jnz x_loop
add esi,[full]
add edi,[line]
dec ecx
jnz y_loop
pop edi
pop esi
pop ebx
}
}

View File

@ -1,59 +0,0 @@
;/*
;* Glide64 - Glide video plugin for Nintendo 64 emulators.
;*
;* This program is free software; you can redistribute it and/or modify
;* it under the terms of the GNU General Public License as published by
;* the Free Software Foundation; either version 2 of the License, or
;* any later version.
;*
;* This program is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
;* GNU General Public License for more details.
;*
;* You should have received a copy of the GNU General Public License
;* along with this program; if not, write to the Free Software
;* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
;*/
;
;****************************************************************
;
; Glide64 - Glide Plugin for Nintendo 64 emulators
; Project started on December 29th, 2001
;
; Authors:
; Dave2001, original author, founded the project in 2001, left it in 2002
; Gugaman, joined the project in 2002, left it in 2002
; Sergey 'Gonetz' Lipski, joined the project in 2002, main author since fall of 2002
; Hiroshi 'KoolSmoky' Morii, joined the project in 2007
;
;****************************************************************
;
; To modify Glide64:
; * Write your name and (optional)email, commented by your work, so I know who did it, and so that you can find which parts you modified when it comes time to send it to me.
; * Do NOT send me the whole project or file that you modified. Take out your modified code sections, and tell me where to put them. If people sent the whole thing, I would have many different versions, but no idea how to combine them all.
;
;****************************************************************
%include "inc/c32.mac"
segment .text
;****************************************************************
;
; ******** Load block/tile ********
;
;****************************************************************
proc asmLoadBlock
CPU 586
%$src arg
%$dst arg
%$off arg
%$dxt arg
%$cnt arg
%$swp arg
endproc ;asmLoadBlock

File diff suppressed because it is too large Load Diff

View File

@ -48,33 +48,8 @@
#include "FBtoScreen.h"
#include "CRC.h"
/*****************************************************************
; SwapBlock - swaps every other 32-bit word at addr
;
; ecx = num_words -> 0
; edi = addr -> end of dest
;*****************************************************************/
__declspec(naked) void SwapBlock32 ()
{
_asm {
push eax
push ebx
or ecx,ecx
jz swapblock32_end
swapblock32_loop:
mov eax,[edi]
mov ebx,[edi+4]
mov [edi],ebx
mov [edi+4],eax
add edi,8
dec ecx
jnz swapblock32_loop
swapblock32_end:
pop ebx
pop eax
ret
}
}
extern "C" void SwapBlock32 ();
extern "C" void SwapBlock64 ();
const int NumOfFormats = 3;
SCREEN_SHOT_FORMAT ScreenShotFormats[NumOfFormats] = { {wxT("BMP"), wxT("bmp"), wxBITMAP_TYPE_BMP}, {wxT("PNG"), wxT("png"), wxBITMAP_TYPE_PNG}, {wxT("JPEG"), wxT("jpeg"), wxBITMAP_TYPE_JPEG} };
@ -1850,165 +1825,7 @@ void setTBufTex(wxUint16 t_mem, wxUint32 cnt)
}
}
/*****************************************************************
; CopyBlock - copies a block from base_addr+offset to dest_addr, while unswapping the
; data within.
;
; edi = dest_addr -> end of dest
; ecx = num_words
; esi = base_addr (preserved)
; edx = offset (preserved)
;*****************************************************************/
__declspec(naked) void CopyBlock ( void )
{
_asm {
push eax
push ebx
push esi
push edx
or ecx,ecx
jz near copyblock_end
push ecx
; first, set the source address and check if not on a dword boundary
push esi
push edx
mov ebx,edx
and edx,0FFFFFFFCh
add esi,edx
and ebx,3 ; ebx = # we DON'T need to copy
jz copyblock_copy
mov edx,4 ; ecx = # we DO need to copy
sub edx,ebx
; load the first word, accounting for swapping
mov eax,[esi]
add esi,4
copyblock_precopy_skip:
rol eax,8
dec ebx
jnz copyblock_precopy_skip
copyblock_precopy_copy:
rol eax,8
mov [edi],al
inc edi
dec edx
jnz copyblock_precopy_copy
mov eax,[esi]
add esi,4
bswap eax
mov [edi],eax
add edi,4
dec ecx ; 1 less word to copy
jz copyblock_postcopy
copyblock_copy:
mov eax,[esi]
bswap eax
mov [edi],eax
mov eax,[esi+4]
bswap eax
mov [edi+4],eax
add esi,8
add edi,8
dec ecx
jnz copyblock_copy
copyblock_postcopy:
pop edx
pop esi
pop ecx
; check again if on dword boundary
mov ebx,edx ; ebx = # we DO need to copy
and ebx,3
jz copyblock_end
shl ecx,3 ; ecx = num_words * 8
add edx,ecx
and edx,0FFFFFFFCh
add esi,edx
mov eax,[esi]
copyblock_postcopy_copy:
rol eax,8
mov [edi],al
inc edi
dec ebx
jnz copyblock_postcopy_copy
copyblock_end:
pop edx
pop esi
pop ebx
pop eax
ret
}
}
void asmLoadBlock(int src, int dst, int off, int dxt, int cnt, wxUIntPtr swp)
{
_asm {
push ebx
push esi
push edi
; copy the data
mov esi,[src]
mov edi,[dst]
mov ecx,[cnt]
mov edx,[off]
call CopyBlock
; now swap it
mov eax,[cnt] ; eax = count remaining
xor edx,edx ; edx = dxt counter
mov edi,[dst]
mov ebx,[dxt]
xor ecx,ecx ; ecx = how much to copy
dxt_test:
add edi,8
dec eax
jz end_dxt_test
add edx,ebx
jns dxt_test
dxt_s_test:
inc ecx
dec eax
jz end_dxt_test
add edx,ebx
js dxt_s_test
; swap this data (ecx set, dst set)
call [swp] ; (ecx reset to 0 after)
jmp dxt_test ; and repeat
end_dxt_test:
; swap any remaining data
call [swp]
pop edi
pop esi
pop ebx
}
}
extern "C" void asmLoadBlock(int src, int dst, int off, int dxt, int cnt, int swp);
void LoadBlock32b(wxUint32 tile, wxUint32 ul_s, wxUint32 ul_t, wxUint32 lr_s, wxUint32 dxt);
static void rdp_loadblock()
{
@ -2100,55 +1917,7 @@ static void rdp_loadblock()
setTBufTex(rdp.tiles[tile].t_mem, cnt);
}
void asmLoadTile(int src, int dst, int width, int height, int line, int off, int end)
{
_asm {
push ebx
push esi
push edi
; set initial values
mov edi,[dst]
mov ecx,[width]
mov esi,[src]
mov edx,[off]
xor ebx,ebx ; swap this line?
mov eax,[height]
loadtile_loop:
cmp [end],edi ; end of tmem: error
jc loadtile_end
; copy this line
push edi
push ecx
call CopyBlock
pop ecx
; swap it?
xor ebx,1
jnz loadtile_no_swap
; (ecx set, restore edi)
pop edi
call SwapBlock32
jmp loadtile_swap_end
loadtile_no_swap:
add sp,4 ; forget edi, we are already at the next position
loadtile_swap_end:
add edx,[line]
dec eax
jnz loadtile_loop
loadtile_end:
pop edi
pop esi
pop ebx
}
}
extern "C" void asmLoadTile(int src, int dst, int width, int height, int line, int off, int end);
void LoadTile32b (wxUint32 tile, wxUint32 ul_s, wxUint32 ul_t, wxUint32 width, wxUint32 height);
static void rdp_loadtile()
{

View File

@ -1,248 +0,0 @@
#include "Gfx #1.3.h"
/*****************************************************************
;
; ******** Textures conversion ********
;
;*****************************************************************/
void asmTexConv_ARGB1555_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int isize)
{
_asm {
push ebx
push esi
push edi
mov esi,[src]
mov edi,[dst]
mov ecx,[isize]
tc1_loop:
mov eax,[esi]
add esi,4
; arrr rrgg gggb bbbb
; aaaa rrrr gggg bbbb
mov edx,eax
and eax,0x80008000
mov ebx,eax ; ebx = 0xa000000000000000
shr eax,1
or ebx,eax ; ebx = 0xaa00000000000000
shr eax,1
or ebx,eax ; ebx = 0xaaa0000000000000
shr eax,1
or ebx,eax ; ebx = 0xaaaa000000000000
mov eax,edx
and eax,0x78007800 ; eax = 0x0rrrr00000000000
shr eax,3 ; eax = 0x0000rrrr00000000
or ebx,eax ; ebx = 0xaaaarrrr00000000
mov eax,edx
and eax,0x03c003c0 ; eax = 0x000000gggg000000
shr eax,2 ; eax = 0x00000000gggg0000
or ebx,eax ; ebx = 0xaaaarrrrgggg0000
and edx,0x001e001e ; edx = 0x00000000000bbbb0
shr edx,1 ; edx = 0x000000000000bbbb
or ebx,edx ; ebx = 0xaaaarrrrggggbbbb
mov [edi],ebx
add edi,4
dec ecx
jnz tc1_loop
pop edi
pop esi
pop ebx
}
}
void asmTexConv_AI88_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int isize)
{
_asm {
push ebx
push esi
push edi
mov esi,[src]
mov edi,[dst]
mov ecx,[isize]
tc1_loop:
mov eax,[esi]
add esi,4
; aaaa aaaa iiii iiii
; aaaa rrrr gggg bbbb
mov edx,eax
and eax,0xF000F000 ; eax = 0xaaaa000000000000
mov ebx,eax ; ebx = 0xaaaa000000000000
and edx,0x00F000F0 ; edx = 0x00000000iiii0000
shl edx,4 ; edx = 0x0000iiii00000000
or ebx,edx ; ebx = 0xaaaaiiii00000000
shr edx,4 ; edx = 0x00000000iiii0000
or ebx,edx ; ebx = 0xaaaaiiiiiiii0000
shr edx,4 ; edx = 0x000000000000iiii
or ebx,edx ; ebx = 0xaaaaiiiiiiiiiiii
mov [edi],ebx
add edi,4
dec ecx
jnz tc1_loop
pop edi
pop esi
pop ebx
}
}
void asmTexConv_AI44_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int isize)
{
_asm {
push ebx
push esi
push edi
mov esi,[src]
mov edi,[dst]
mov ecx,[isize]
tc1_loop:
mov eax,[esi]
add esi,4
; aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0
; aaaa1 rrrr1 gggg1 bbbb1 aaaa0 rrrr0 gggg0 bbbb0
; aaaa3 rrrr3 gggg3 bbbb3 aaaa2 rrrr2 gggg2 bbbb2
mov edx,eax ; eax = aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0
shl eax,16 ; eax = aaaa1 iiii1 aaaa0 iiii0 0000 0000 0000 0000
and eax,0xFF000000 ; eax = aaaa1 iiii1 0000 0000 0000 0000 0000 0000
mov ebx,eax ; ebx = aaaa1 iiii1 0000 0000 0000 0000 0000 0000
and eax,0x0F000000 ; eax = 0000 iiii1 0000 0000 0000 0000 0000 0000
shr eax,4 ; eax = 0000 0000 iiii1 0000 0000 0000 0000 0000
or ebx,eax ; ebx = aaaa1 iiii1 iiii1 0000 0000 0000 0000 0000
shr eax,4 ; eax = 0000 0000 0000 iiii1 0000 0000 0000 0000
or ebx,eax ; ebx = aaaa1 iiii1 iiii1 iiii1 0000 0000 0000 0000
mov eax,edx ; eax = aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0
shl eax,8 ; eax = aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0 0000 0000
and eax,0x0000FF00 ; eax = 0000 0000 0000 0000 aaaa0 iiii0 0000 0000
or ebx,eax ; ebx = aaaa1 iiii1 iiii1 iiii1 aaaa0 iiii0 0000 0000
and eax,0x00000F00 ; eax = 0000 0000 0000 0000 0000 iiii0 0000 0000
shr eax,4 ; eax = 0000 0000 0000 0000 0000 0000 iiii0 0000
or ebx,eax ; ebx = aaaa1 iiii1 iiii1 iiii1 aaaa0 iiii0 iiii0 0000
shr eax,4 ; eax = 0000 0000 0000 0000 0000 0000 0000 iiii0
or ebx,eax ; ebx = aaaa1 iiii1 iiii1 iiii1 aaaa0 iiii0 iiii0 iiii0
mov [edi],ebx
add edi,4
mov eax,edx ; eax = aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0
and eax,0xFF000000 ; eax = aaaa3 iiii3 0000 0000 0000 0000 0000 0000
mov ebx,eax ; ebx = aaaa3 iiii3 0000 0000 0000 0000 0000 0000
and eax,0x0F000000 ; eax = 0000 iiii3 0000 0000 0000 0000 0000 0000
shr eax,4 ; eax = 0000 0000 iiii3 0000 0000 0000 0000 0000
or ebx,eax ; ebx = aaaa3 iiii3 iiii3 0000 0000 0000 0000 0000
shr eax,4 ; eax = 0000 0000 0000 iiii3 0000 0000 0000 0000
or ebx,eax ; ebx = aaaa3 iiii3 iiii3 iiii3 0000 0000 0000 0000
; edx = aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0
shr edx,8 ; edx = 0000 0000 aaaa3 aaaa3 aaaa2 iiii2 aaaa1 iiii1
and edx,0x0000FF00 ; edx = 0000 0000 0000 0000 aaaa2 iiii2 0000 0000
or ebx,edx ; ebx = aaaa3 iiii3 iiii3 iiii3 aaaa2 iiii2 0000 0000
and edx,0x00000F00 ; edx = 0000 0000 0000 0000 0000 iiii2 0000 0000
shr edx,4 ; edx = 0000 0000 0000 0000 0000 0000 iiii2 0000
or ebx,edx ; ebx = aaaa3 iiii3 iiii3 iiii3 aaaa2 iiii2 iiii2 0000
shr edx,4 ; edx = 0000 0000 0000 0000 0000 0000 0000 iiii2
or ebx,edx ; ebx = aaaa3 iiii3 iiii3 iiii3 aaaa2 iiii2 iiii2 iiii2
mov [edi],ebx
add edi,4
dec ecx
jnz tc1_loop
pop edi
pop esi
pop ebx
}
}
void asmTexConv_A8_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int isize)
{
_asm {
push ebx
push esi
push edi
mov esi,[src]
mov edi,[dst]
mov ecx,[isize]
tc1_loop:
mov eax,[esi]
add esi,4
; aaaa3 aaaa3 aaaa2 aaaa2 aaaa1 aaaa1 aaaa0 aaaa0
; aaaa1 rrrr1 gggg1 bbbb1 aaaa0 rrrr0 gggg0 bbbb0
; aaaa3 rrrr3 gggg3 bbbb3 aaaa2 rrrr2 gggg2 bbbb2
mov edx,eax
and eax,0x0000F000 ; eax = 00 00 00 00 a1 00 00 00
shl eax,16 ; eax = a1 00 00 00 00 00 00 00
mov ebx,eax ; ebx = a1 00 00 00 00 00 00 00
shr eax,4
or ebx,eax ; ebx = a1 a1 00 00 00 00 00 00
shr eax,4
or ebx,eax ; ebx = a1 a1 a1 00 00 00 00 00
shr eax,4
or ebx,eax ; ebx = a1 a1 a1 a1 00 00 00 00
mov eax,edx
and eax,0x000000F0 ; eax = 00 00 00 00 00 00 a0 00
shl eax,8 ; eax = 00 00 00 00 a0 00 00 00
or ebx,eax
shr eax,4
or ebx,eax
shr eax,4
or ebx,eax
shr eax,4
or ebx,eax ; ebx = a1 a1 a1 a1 a0 a0 a0 a0
mov [edi],ebx
add edi,4
mov eax,edx ; eax = a3 a3 a2 a2 a1 a1 a0 a0
and eax,0xF0000000 ; eax = a3 00 00 00 00 00 00 00
mov ebx,eax ; ebx = a3 00 00 00 00 00 00 00
shr eax,4
or ebx,eax ; ebx = a3 a3 00 00 00 00 00 00
shr eax,4
or ebx,eax ; ebx = a3 a3 a3 00 00 00 00 00
shr eax,4
or ebx,eax ; ebx = a3 a3 a3 a3 00 00 00 00
and edx,0x00F00000 ; eax = 00 00 a2 00 00 00 00 00
shr edx,8 ; eax = 00 00 00 00 a2 00 00 00
or ebx,edx
shr edx,4
or ebx,edx
shr edx,4
or ebx,edx
shr edx,4
or ebx,edx ; ebx = a3 a3 a3 a3 a2 a2 a2 a2
mov [edi],ebx
add edi,4
dec ecx
jnz tc1_loop
pop edi
pop esi
pop ebx
}
}