[Glide64] Move the code out of texture.asm.cpp

This commit is contained in:
zilmar 2015-10-13 15:54:52 +11:00
parent d7a19c265f
commit 48554d2ad0
10 changed files with 3538 additions and 3886 deletions

View File

@ -270,10 +270,6 @@
RelativePath="TexModCI.h"
>
</File>
<File
RelativePath=".\Texture.asm.cpp"
>
</File>
</Filter>
<Filter
Name="Config"

View File

@ -41,9 +41,156 @@
//
//****************************************************************
extern "C" void asmMirror32bS (int tex, int start, int width, int height, int mask, int line, int full, int count);
extern "C" void asmWrap32bS (int tex, int start, int height, int mask, int line, int full, int count);
extern "C" void asmClamp32bS (int tex, int constant, int height,int line, int full, int count);
extern "C" void __declspec(naked) asmMirror32bS (int tex, int start, int width, int height, int mask, int line, int full, int count)
{
_asm {
align 4
push ebp
mov ebp, esp
push ebx
push esi
push edi
mov edi,[start]
mov ecx,[height]
loop_y:
xor edx,edx
loop_x:
mov esi,[tex]
mov ebx,[width]
add ebx,edx
and ebx,[width]
jnz is_mirrored
mov eax,edx
shl eax,2
and eax,[mask]
add esi,eax
mov eax,[esi]
mov [edi],eax
add edi,4
jmp end_mirror_check
is_mirrored:
add esi,[mask]
mov eax,edx
shl eax,2
and eax,[mask]
sub esi,eax
mov eax,[esi]
mov [edi],eax
add edi,4
end_mirror_check:
inc edx
cmp edx,[count]
jne loop_x
add edi,[line]
mov eax,[tex]
add eax,[full]
mov [tex],eax
dec ecx
jnz loop_y
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret
}
}
extern "C" void __declspec(naked) asmWrap32bS (int tex, int start, int height, int mask, int line, int full, int count)
{
_asm {
align 4
push ebp
mov ebp, esp
push ebx
push esi
push edi
mov edi,[start]
mov ecx,[height]
loop_y:
xor edx,edx
loop_x:
mov esi,[tex]
mov eax,edx
and eax,[mask]
shl eax,2
add esi,eax
mov eax,[esi]
mov [edi],eax
add edi,4
inc edx
cmp edx,[count]
jne loop_x
add edi,[line]
mov eax,[tex]
add eax,[full]
mov [tex],eax
dec ecx
jnz loop_y
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret
}
}
extern "C" void __declspec(naked) asmClamp32bS (int tex, int constant, int height,int line, int full, int count)
{
_asm {
align 4
push ebp
mov ebp, esp
push ebx
push esi
push edi
mov esi,[constant]
mov edi,[tex]
mov ecx,[height]
y_loop:
mov eax,[esi]
mov edx,[count]
x_loop:
mov [edi],eax // don't unroll or make dword, it may go into next line (doesn't have to be multiple of two)
add edi,4
dec edx
jnz x_loop
add esi,[full]
add edi,[line]
dec ecx
jnz y_loop
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret
}
}
//****************************************************************
// 32-bit Horizontal Mirror

View File

@ -40,9 +40,155 @@
//****************************************************************
// 8-bit Horizontal Mirror
extern "C" void asmMirror8bS (int tex, int start, int width, int height, int mask, int line, int full, int count);
extern "C" void asmWrap8bS (int tex, int start, int height, int mask, int line, int full, int count);
extern "C" void asmClamp8bS (int tex, int constant, int height,int line, int full, int count);
extern "C" void __declspec(naked) asmMirror8bS (int tex, int start, int width, int height, int mask, int line, int full, int count)
{
_asm{
ALIGN 4
push ebp
mov ebp, esp
push ebx
push esi
push edi
mov edi,[start]
mov ecx,[height]
loop_y:
xor edx,edx
loop_x:
mov esi,[tex]
mov ebx,[width]
add ebx,edx
and ebx,[width]
jnz is_mirrored
mov eax,edx
and eax,[mask]
add esi,eax
mov al,[esi]
mov [edi],al
inc edi
jmp end_mirror_check
is_mirrored:
add esi,[mask]
mov eax,edx
and eax,[mask]
sub esi,eax
mov al,[esi]
mov [edi],al
inc edi
end_mirror_check:
inc edx
cmp edx,[count]
jne loop_x
add edi,[line]
mov eax,[tex]
add eax,[full]
mov [tex],eax
dec ecx
jnz loop_y
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret
}
}
extern "C" void __declspec(naked) asmWrap8bS (int tex, int start, int height, int mask, int line, int full, int count)
{
_asm {
align 4
push ebp
mov ebp, esp
push ebx
push esi
push edi
mov edi,[start]
mov ecx,[height]
loop_y:
xor edx,edx
loop_x:
mov esi,[tex]
mov eax,edx
and eax,[mask]
shl eax,2
add esi,eax
mov eax,[esi]
mov [edi],eax
add edi,4
inc edx
cmp edx,[count]
jne loop_x
add edi,[line]
mov eax,[tex]
add eax,[full]
mov [tex],eax
dec ecx
jnz loop_y
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret
}
}
extern "C" void __declspec(naked) asmClamp8bS (int tex, int constant, int height,int line, int full, int count)
{
_asm {
align 4
push ebp
mov ebp, esp
push ebx
push esi
push edi
mov esi,[constant]
mov edi,[tex]
mov ecx,[height]
y_loop:
mov al,[esi]
mov edx,[count]
x_loop:
mov [edi],al // don't unroll or make dword, it may go into next line (doesn't have to be multiple of two)
inc edi
dec edx
jnz x_loop
add esi,[full]
add edi,[line]
dec ecx
jnz y_loop
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret
}
}
void Mirror8bS (wxUint32 tex, wxUint32 mask, wxUint32 max_width, wxUint32 real_width, wxUint32 height)
{

View File

@ -150,7 +150,55 @@ void ClearCache ()
//****************************************************************
// GetTexInfo - gets information for either t0 or t1, checks if in cache & fills tex_found
extern "C" int asmTextureCRC(int addr, int width, int height, int line);
extern "C" __declspec(naked) int asmTextureCRC(int addr, int width, int height, int line)
{
_asm {
align 4
push ebp
mov ebp, esp
push ebx
push edi
xor eax,eax // eax is final result
mov ebx,[line]
mov ecx,[height] // ecx is height counter
mov edi,[addr] // edi is ptr to texture memory
crc_loop_y:
push ecx
mov ecx,[width]
crc_loop_x:
add eax,[edi] // MUST be 64-bit aligned, so manually unroll
add eax,[edi+4]
mov edx,ecx
mul edx
add eax,edx
add edi,8
dec ecx
jnz crc_loop_x
pop ecx
mov edx,ecx
mul edx
add eax,edx
add edi,ebx
dec ecx
jnz crc_loop_y
pop edi
pop ebx
mov esp, ebp
pop ebp
ret
}
}
void GetTexInfo (int id, int tile)
{
FRDP (" | |-+ GetTexInfo (id: %d, tile: %d)\n", id, tile);

View File

@ -37,10 +37,270 @@
//
//****************************************************************
extern "C" void asmTexConv_ARGB1555_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int size);
extern "C" void asmTexConv_AI88_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int size);
extern "C" void asmTexConv_AI44_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int size);
extern "C" void asmTexConv_A8_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int size);
extern "C" void __declspec(naked) asmTexConv_ARGB1555_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int isize)
{
_asm {
align 4
push ebp
mov ebp, esp
push ebx
push esi
push edi
mov esi,[src]
mov edi,[dst]
mov ecx,[isize]
tc1_loop:
mov eax,[esi]
add esi,4
// arrr rrgg gggb bbbb
// aaaa rrrr gggg bbbb
mov edx,eax
and eax,0x80008000
mov ebx,eax // ebx = 0xa000000000000000
shr eax,1
or ebx,eax // ebx = 0xaa00000000000000
shr eax,1
or ebx,eax // ebx = 0xaaa0000000000000
shr eax,1
or ebx,eax // ebx = 0xaaaa000000000000
mov eax,edx
and eax,0x78007800 // eax = 0x0rrrr00000000000
shr eax,3 // eax = 0x0000rrrr00000000
or ebx,eax // ebx = 0xaaaarrrr00000000
mov eax,edx
and eax,0x03c003c0 // eax = 0x000000gggg000000
shr eax,2 // eax = 0x00000000gggg0000
or ebx,eax // ebx = 0xaaaarrrrgggg0000
and edx,0x001e001e // edx = 0x00000000000bbbb0
shr edx,1 // edx = 0x000000000000bbbb
or ebx,edx // ebx = 0xaaaarrrrggggbbbb
mov [edi],ebx
add edi,4
dec ecx
jnz tc1_loop
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret
}
}
extern "C" void __declspec(naked) asmTexConv_AI88_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int isize)
{
_asm {
align 4
push ebp
mov ebp, esp
push ebx
push esi
push edi
mov esi,[src]
mov edi,[dst]
mov ecx,[isize]
tc1_loop:
mov eax,[esi]
add esi,4
// aaaa aaaa iiii iiii
// aaaa rrrr gggg bbbb
mov edx,eax
and eax,0xF000F000 // eax = 0xaaaa000000000000
mov ebx,eax // ebx = 0xaaaa000000000000
and edx,0x00F000F0 // edx = 0x00000000iiii0000
shl edx,4 // edx = 0x0000iiii00000000
or ebx,edx // ebx = 0xaaaaiiii00000000
shr edx,4 // edx = 0x00000000iiii0000
or ebx,edx // ebx = 0xaaaaiiiiiiii0000
shr edx,4 // edx = 0x000000000000iiii
or ebx,edx // ebx = 0xaaaaiiiiiiiiiiii
mov [edi],ebx
add edi,4
dec ecx
jnz tc1_loop
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret
}
}
extern "C" void __declspec(naked) asmTexConv_AI44_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int isize)
{
_asm {
align 4
push ebp
mov ebp, esp
push ebx
push esi
push edi
mov esi,[src]
mov edi,[dst]
mov ecx,[isize]
tc1_loop:
mov eax,[esi]
add esi,4
// aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0
// aaaa1 rrrr1 gggg1 bbbb1 aaaa0 rrrr0 gggg0 bbbb0
// aaaa3 rrrr3 gggg3 bbbb3 aaaa2 rrrr2 gggg2 bbbb2
mov edx,eax // eax = aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0
shl eax,16 // eax = aaaa1 iiii1 aaaa0 iiii0 0000 0000 0000 0000
and eax,0xFF000000 // eax = aaaa1 iiii1 0000 0000 0000 0000 0000 0000
mov ebx,eax // ebx = aaaa1 iiii1 0000 0000 0000 0000 0000 0000
and eax,0x0F000000 // eax = 0000 iiii1 0000 0000 0000 0000 0000 0000
shr eax,4 // eax = 0000 0000 iiii1 0000 0000 0000 0000 0000
or ebx,eax // ebx = aaaa1 iiii1 iiii1 0000 0000 0000 0000 0000
shr eax,4 // eax = 0000 0000 0000 iiii1 0000 0000 0000 0000
or ebx,eax // ebx = aaaa1 iiii1 iiii1 iiii1 0000 0000 0000 0000
mov eax,edx // eax = aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0
shl eax,8 // eax = aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0 0000 0000
and eax,0x0000FF00 // eax = 0000 0000 0000 0000 aaaa0 iiii0 0000 0000
or ebx,eax // ebx = aaaa1 iiii1 iiii1 iiii1 aaaa0 iiii0 0000 0000
and eax,0x00000F00 // eax = 0000 0000 0000 0000 0000 iiii0 0000 0000
shr eax,4 // eax = 0000 0000 0000 0000 0000 0000 iiii0 0000
or ebx,eax // ebx = aaaa1 iiii1 iiii1 iiii1 aaaa0 iiii0 iiii0 0000
shr eax,4 // eax = 0000 0000 0000 0000 0000 0000 0000 iiii0
or ebx,eax // ebx = aaaa1 iiii1 iiii1 iiii1 aaaa0 iiii0 iiii0 iiii0
mov [edi],ebx
add edi,4
mov eax,edx // eax = aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0
and eax,0xFF000000 // eax = aaaa3 iiii3 0000 0000 0000 0000 0000 0000
mov ebx,eax // ebx = aaaa3 iiii3 0000 0000 0000 0000 0000 0000
and eax,0x0F000000 // eax = 0000 iiii3 0000 0000 0000 0000 0000 0000
shr eax,4 // eax = 0000 0000 iiii3 0000 0000 0000 0000 0000
or ebx,eax // ebx = aaaa3 iiii3 iiii3 0000 0000 0000 0000 0000
shr eax,4 // eax = 0000 0000 0000 iiii3 0000 0000 0000 0000
or ebx,eax // ebx = aaaa3 iiii3 iiii3 iiii3 0000 0000 0000 0000
// edx = aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0
shr edx,8 // edx = 0000 0000 aaaa3 aaaa3 aaaa2 iiii2 aaaa1 iiii1
and edx,0x0000FF00 // edx = 0000 0000 0000 0000 aaaa2 iiii2 0000 0000
or ebx,edx // ebx = aaaa3 iiii3 iiii3 iiii3 aaaa2 iiii2 0000 0000
and edx,0x00000F00 // edx = 0000 0000 0000 0000 0000 iiii2 0000 0000
shr edx,4 // edx = 0000 0000 0000 0000 0000 0000 iiii2 0000
or ebx,edx // ebx = aaaa3 iiii3 iiii3 iiii3 aaaa2 iiii2 iiii2 0000
shr edx,4 // edx = 0000 0000 0000 0000 0000 0000 0000 iiii2
or ebx,edx // ebx = aaaa3 iiii3 iiii3 iiii3 aaaa2 iiii2 iiii2 iiii2
mov [edi],ebx
add edi,4
dec ecx
jnz tc1_loop
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret
}
}
extern "C" void __declspec(naked) asmTexConv_A8_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int isize)
{
_asm {
align 4
push ebp
mov ebp, esp
push ebx
push esi
push edi
mov esi,[src]
mov edi,[dst]
mov ecx,[isize]
tc1_loop:
mov eax,[esi]
add esi,4
// aaaa3 aaaa3 aaaa2 aaaa2 aaaa1 aaaa1 aaaa0 aaaa0
// aaaa1 rrrr1 gggg1 bbbb1 aaaa0 rrrr0 gggg0 bbbb0
// aaaa3 rrrr3 gggg3 bbbb3 aaaa2 rrrr2 gggg2 bbbb2
mov edx,eax
and eax,0x0000F000 // eax = 00 00 00 00 a1 00 00 00
shl eax,16 // eax = a1 00 00 00 00 00 00 00
mov ebx,eax // ebx = a1 00 00 00 00 00 00 00
shr eax,4
or ebx,eax // ebx = a1 a1 00 00 00 00 00 00
shr eax,4
or ebx,eax // ebx = a1 a1 a1 00 00 00 00 00
shr eax,4
or ebx,eax // ebx = a1 a1 a1 a1 00 00 00 00
mov eax,edx
and eax,0x000000F0 // eax = 00 00 00 00 00 00 a0 00
shl eax,8 // eax = 00 00 00 00 a0 00 00 00
or ebx,eax
shr eax,4
or ebx,eax
shr eax,4
or ebx,eax
shr eax,4
or ebx,eax // ebx = a1 a1 a1 a1 a0 a0 a0 a0
mov [edi],ebx
add edi,4
mov eax,edx // eax = a3 a3 a2 a2 a1 a1 a0 a0
and eax,0xF0000000 // eax = a3 00 00 00 00 00 00 00
mov ebx,eax // ebx = a3 00 00 00 00 00 00 00
shr eax,4
or ebx,eax // ebx = a3 a3 00 00 00 00 00 00
shr eax,4
or ebx,eax // ebx = a3 a3 a3 00 00 00 00 00
shr eax,4
or ebx,eax // ebx = a3 a3 a3 a3 00 00 00 00
and edx,0x00F00000 // eax = 00 00 a2 00 00 00 00 00
shr edx,8 // eax = 00 00 00 00 a2 00 00 00
or ebx,edx
shr edx,4
or ebx,edx
shr edx,4
or ebx,edx
shr edx,4
or ebx,edx // ebx = a3 a3 a3 a3 a2 a2 a2 a2
mov [edi],ebx
add edi,4
dec ecx
jnz tc1_loop
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret
}
}
void TexConv_ARGB1555_ARGB4444 (wxUIntPtr src, wxUIntPtr dst, int width, int height)
{

View File

@ -37,9 +37,164 @@
//
//****************************************************************
extern "C" void asmLoad16bRGBA (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext);
extern "C" void asmLoad16bIA (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext);
extern "C" void __declspec(naked) asmLoad16bRGBA (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext)
{
_asm {
align 4
push ebp
mov ebp,esp
push ebx
push esi
push edi
mov esi,[src]
mov edi,[dst]
mov ecx,[height]
y_loop:
push ecx
mov ecx,[wid_64]
x_loop:
mov eax,[esi] // read both pixels
mov ebx,[esi+4] // read both pixels
bswap eax
bswap ebx
ror ax,1
ror bx,1
ror eax,16
ror ebx,16
ror ax,1
ror bx,1
mov [edi],eax
mov [edi+4],ebx
add esi,8
add edi,8
dec ecx
jnz x_loop
pop ecx
dec ecx
jz end_y_loop
push ecx
mov eax,esi
add eax,[line]
mov esi,[src]
sub eax, esi
and eax, 0xFFF
add esi, eax
add edi,[ext]
mov ecx,[wid_64]
x_loop_2:
mov eax,[esi+4] // read both pixels
mov ebx,[esi] // read both pixels
bswap eax
bswap ebx
ror ax,1
ror bx,1
ror eax,16
ror ebx,16
ror ax,1
ror bx,1
mov [edi],eax
mov [edi+4],ebx
add esi,8
add edi,8
dec ecx
jnz x_loop_2
mov eax,esi
add eax,[line]
mov esi,[src]
sub eax, esi
and eax, 0xFFF
add esi, eax
add edi,[ext]
pop ecx
dec ecx
jnz y_loop
end_y_loop:
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret
}
}
extern "C" void __declspec(naked) asmLoad16bIA (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext)
{
_asm {
ALIGN 4
push ebp
mov ebp, esp
push ebx
push esi
push edi
mov esi,[src]
mov edi,[dst]
mov ecx,[height]
y_loop:
push ecx
mov ecx,[wid_64]
x_loop:
mov eax,[esi] // read both pixels
mov ebx,[esi+4] // read both pixels
mov [edi],eax
mov [edi+4],ebx
add esi,8
add edi,8
dec ecx
jnz x_loop
pop ecx
dec ecx
jz end_y_loop
push ecx
add esi,[line]
add edi,[ext]
mov ecx,[wid_64]
x_loop_2:
mov eax,[esi+4] // read both pixels
mov ebx,[esi] // read both pixels
mov [edi],eax
mov [edi+4],ebx
add esi,8
add edi,8
dec ecx
jnz x_loop_2
add esi,[line]
add edi,[ext]
pop ecx
dec ecx
jnz y_loop
end_y_loop:
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret
}
}
//****************************************************************
// Size: 2, Format: 0

File diff suppressed because it is too large Load Diff

View File

@ -37,10 +37,630 @@
//
//****************************************************************
extern "C" void asmLoad8bCI (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext, wxUIntPtr pal);
extern "C" void asmLoad8bIA8 (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext, wxUIntPtr pal);
extern "C" void asmLoad8bIA4 (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext);
extern "C" void asmLoad8bI (wxUIntPtr src, int dst, wxUIntPtr wid_64, int height, int line, int ext);
extern "C" void __declspec(naked) asmLoad8bCI (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext, wxUIntPtr pal)
{
_asm {
push ebp
mov ebp, esp
push ebx
push esi
push edi
mov ebx,[pal]
mov esi,[src]
mov edi,[dst]
mov ecx,[height]
y_loop:
push ecx
mov ecx,[wid_64]
x_loop:
push ecx
mov eax,[esi] // read all 4 pixels
bswap eax
add esi,4
mov edx,eax
// 1st dword output {
shr eax,15
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
mov eax,edx
shr eax,23
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
mov [edi],ecx
add edi,4
// }
// 2nd dword output {
mov eax,edx
shl eax,1
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
shr edx,7
and edx,0x1FE
mov cx,[ebx+edx]
ror cx,1
mov [edi],ecx
add edi,4
// }
// * copy
mov eax,[esi] // read all 4 pixels
bswap eax
add esi,4
mov edx,eax
// 1st dword output {
shr eax,15
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
mov eax,edx
shr eax,23
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
mov [edi],ecx
add edi,4
// }
// 2nd dword output {
mov eax,edx
shl eax,1
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
shr edx,7
and edx,0x1FE
mov cx,[ebx+edx]
ror cx,1
mov [edi],ecx
add edi,4
// }
// *
pop ecx
dec ecx
jnz x_loop
pop ecx
dec ecx
jz near end_y_loop
push ecx
mov eax,esi
add eax,[line]
mov esi,[src]
sub eax,esi
and eax,0x7FF
add esi,eax
add edi,[ext]
mov ecx,[wid_64]
x_loop_2:
push ecx
mov eax,[esi+4] // read all 4 pixels
bswap eax
mov edx,eax
// 1st dword output {
shr eax,15
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
mov eax,edx
shr eax,23
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
mov [edi],ecx
add edi,4
// }
// 2nd dword output {
mov eax,edx
shl eax,1
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
shr edx,7
and edx,0x1FE
mov cx,[ebx+edx]
ror cx,1
mov [edi],ecx
add edi,4
// }
// * copy
mov eax,[esi] // read all 4 pixels
bswap eax
mov edx,esi
add edx,8
mov esi,[src]
sub edx,esi
and edx,0x7FF
add esi,edx
mov edx,eax
// 1st dword output {
shr eax,15
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
mov eax,edx
shr eax,23
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
mov [edi],ecx
add edi,4
// }
// 2nd dword output {
mov eax,edx
shl eax,1
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
shr edx,7
and edx,0x1FE
mov cx,[ebx+edx]
ror cx,1
mov [edi],ecx
add edi,4
// }
// *
pop ecx
dec ecx
jnz x_loop_2
mov eax,esi
add eax,[line]
mov esi,[src]
sub eax,esi
and eax,0x7FF
add esi,eax
add edi,[ext]
pop ecx
dec ecx
jnz y_loop
end_y_loop:
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret
}
}
extern "C" void __declspec(naked) asmLoad8bIA8 (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext, wxUIntPtr pal)
{
_asm {
push ebp
mov ebp, esp
push ebx
push esi
push edi
mov ebx,[pal]
mov esi,[src]
mov edi,[dst]
mov ecx,[height]
y_loop:
push ecx
mov ecx,[wid_64]
x_loop:
push ecx
mov eax,[esi] // read all 4 pixels
bswap eax
add esi,4
mov edx,eax
// 1st dword output {
shr eax,15
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
mov eax,edx
shr eax,23
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
mov [edi],ecx
add edi,4
// }
// 2nd dword output {
mov eax,edx
shl eax,1
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
shr edx,7
and edx,0x1FE
mov cx,[ebx+edx]
ror cx,8
mov [edi],ecx
add edi,4
// }
// * copy
mov eax,[esi] // read all 4 pixels
bswap eax
add esi,4
mov edx,eax
// 1st dword output {
shr eax,15
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
mov eax,edx
shr eax,23
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
mov [edi],ecx
add edi,4
// }
// 2nd dword output {
mov eax,edx
shl eax,1
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
shr edx,7
and edx,0x1FE
mov cx,[ebx+edx]
ror cx,8
mov [edi],ecx
add edi,4
// }
// *
pop ecx
dec ecx
jnz x_loop
pop ecx
dec ecx
jz near end_y_loop
push ecx
add esi,[line]
add edi,[ext]
mov ecx,[wid_64]
x_loop_2:
push ecx
mov eax,[esi+4] // read all 4 pixels
bswap eax
mov edx,eax
// 1st dword output {
shr eax,15
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
mov eax,edx
shr eax,23
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
mov [edi],ecx
add edi,4
// }
// 2nd dword output {
mov eax,edx
shl eax,1
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
shr edx,7
and edx,0x1FE
mov cx,[ebx+edx]
ror cx,8
mov [edi],ecx
add edi,4
// }
// * copy
mov eax,[esi] // read all 4 pixels
bswap eax
add esi,8
mov edx,eax
// 1st dword output {
shr eax,15
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
mov eax,edx
shr eax,23
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
mov [edi],ecx
add edi,4
// }
// 2nd dword output {
mov eax,edx
shl eax,1
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
shr edx,7
and edx,0x1FE
mov cx,[ebx+edx]
ror cx,8
mov [edi],ecx
add edi,4
// }
// *
pop ecx
dec ecx
jnz x_loop_2
add esi,[line]
add edi,[ext]
pop ecx
dec ecx
jnz y_loop
end_y_loop:
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret
}
}
extern "C" void __declspec(naked) asmLoad8bIA4 (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext)
{
_asm {
push ebp
mov ebp, esp
push ebx
push esi
push edi
mov esi,[src]
mov edi,[dst]
mov ecx,[height]
y_loop:
push ecx
mov ecx,[wid_64]
x_loop:
mov eax,[esi] // read all 4 pixels
mov edx,eax
shr eax,4 //all alpha
shl edx,4
and eax,0x0F0F0F0F
and edx,0xF0F0F0F0
add esi,4
or eax,edx
mov [edi],eax // save dword
add edi,4
mov eax,[esi] // read all 4 pixels
mov edx,eax
shr eax,4 //all alpha
shl edx,4
and eax,0x0F0F0F0F
and edx,0xF0F0F0F0
add esi,4
or eax,edx
mov [edi],eax // save dword
add edi,4
// *
dec ecx
jnz x_loop
pop ecx
dec ecx
jz end_y_loop
push ecx
add esi,[line]
add edi,[ext]
mov ecx,[wid_64]
x_loop_2:
mov eax,[esi+4] // read both pixels
mov edx,eax
shr eax,4 //all alpha
shl edx,4
and eax,0x0F0F0F0F
and edx,0xF0F0F0F0
or eax,edx
mov [edi],eax //save dword
add edi,4
mov eax,[esi] // read both pixels
add esi,8
mov edx,eax
shr eax,4 //all alpha
shl edx,4
and eax,0x0F0F0F0F
and edx,0xF0F0F0F0
or eax,edx
mov [edi],eax //save dword
add edi,4
// *
dec ecx
jnz x_loop_2
add esi,[line]
add edi,[ext]
pop ecx
dec ecx
jnz y_loop
end_y_loop:
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret
}
}
extern "C" void __declspec(naked) asmLoad8bI (wxUIntPtr src, int dst, wxUIntPtr wid_64, int height, int line, int ext)
{
_asm {
push ebp
mov ebp, esp
push ebx
push esi
push edi
mov esi,[src]
mov edi,[dst]
mov ecx,[height]
y_loop:
push ecx
mov ecx,[wid_64]
x_loop:
mov eax,[esi] // read all 4 pixels
add esi,4
mov [edi],eax // save dword
add edi,4
mov eax,[esi] // read all 4 pixels
add esi,4
mov [edi],eax // save dword
add edi,4
// *
dec ecx
jnz x_loop
pop ecx
dec ecx
jz end_y_loop
push ecx
add esi,[line]
add edi,[ext]
mov ecx,[wid_64]
x_loop_2:
mov eax,[esi+4] // read both pixels
mov [edi],eax //save dword
add edi,4
mov eax,[esi] // read both pixels
add esi,8
mov [edi],eax //save dword
add edi,4
// *
dec ecx
jnz x_loop_2
add esi,[line]
add edi,[ext]
pop ecx
dec ecx
jnz y_loop
end_y_loop:
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret
}
}
//****************************************************************
// Size: 1, Format: 2

File diff suppressed because it is too large Load Diff

View File

@ -48,8 +48,38 @@
#include "FBtoScreen.h"
#include "CRC.h"
extern "C" void SwapBlock32 ();
extern "C" void SwapBlock64 ();
extern "C" __declspec(naked) void SwapBlock32 ( void )
{
//****************************************************************
// SwapBlock - swaps every other 32-bit word at addr
//
// ecx = num_words -> 0
// edi = addr -> end of dest
//****************************************************************
_asm {
align 4
push ebp
mov ebp, esp
push eax
push ebx
or ecx,ecx
jz swapblock32_end
swapblock32_loop:
mov eax,[edi]
mov ebx,[edi+4]
mov [edi],ebx
mov [edi+4],eax
add edi,8
dec ecx
jnz swapblock32_loop
swapblock32_end:
pop ebx
pop eax
mov esp, ebp
pop ebp
ret
}
}
const int NumOfFormats = 3;
SCREEN_SHOT_FORMAT ScreenShotFormats[NumOfFormats] = { {wxT("BMP"), wxT("bmp"), wxBITMAP_TYPE_BMP}, {wxT("PNG"), wxT("png"), wxBITMAP_TYPE_PNG}, {wxT("JPEG"), wxT("jpeg"), wxBITMAP_TYPE_JPEG} };
@ -1824,7 +1854,168 @@ void setTBufTex(wxUint16 t_mem, wxUint32 cnt)
}
}
extern "C" void asmLoadBlock(int src, int dst, int off, int dxt, int cnt, int swp);
void __declspec(naked) CopyBlock ( void )
{
_asm {
align 4
push ebp
mov ebp, esp
push eax
push ebx
push esi
push edx
or ecx,ecx
jz near copyblock_end
push ecx
// first, set the source address and check if not on a dword boundary
push esi
push edx
mov ebx,edx
and edx,0FFFFFFFCh
add esi,edx
and ebx,3 // ebx = # we DON'T need to copy
jz copyblock_copy
mov edx,4 // ecx = # we DO need to copy
sub edx,ebx
// load the first word, accounting for swapping
mov eax,[esi]
add esi,4
copyblock_precopy_skip:
rol eax,8
dec ebx
jnz copyblock_precopy_skip
copyblock_precopy_copy:
rol eax,8
mov [edi],al
inc edi
dec edx
jnz copyblock_precopy_copy
mov eax,[esi]
add esi,4
bswap eax
mov [edi],eax
add edi,4
dec ecx // 1 less word to copy
jz copyblock_postcopy
copyblock_copy:
mov eax,[esi]
bswap eax
mov [edi],eax
mov eax,[esi+4]
bswap eax
mov [edi+4],eax
add esi,8
add edi,8
dec ecx
jnz copyblock_copy
copyblock_postcopy:
pop edx
pop esi
pop ecx
// check again if on dword boundary
mov ebx,edx // ebx = # we DO need to copy
and ebx,3
jz copyblock_end
shl ecx,3 // ecx = num_words * 8
add edx,ecx
and edx,0FFFFFFFCh
add esi,edx
mov eax,[esi]
copyblock_postcopy_copy:
rol eax,8
mov [edi],al
inc edi
dec ebx
jnz copyblock_postcopy_copy
copyblock_end:
pop edx
pop esi
pop ebx
pop eax
mov esp, ebp
pop ebp
ret
}
}
extern "C" __declspec(naked) void asmLoadBlock(int src, int dst, int off, int dxt, int cnt, wxUIntPtr swp)
{
_asm {
align 4
push ebp
mov ebp, esp
push ebx
push esi
push edi
// copy the data
mov esi,[src]
mov edi,[dst]
mov ecx,[cnt]
mov edx,[off]
call CopyBlock
// now swap it
mov eax,[cnt] // eax = count remaining
xor edx,edx // edx = dxt counter
mov edi,[dst]
mov ebx,[dxt]
xor ecx,ecx // ecx = how much to copy
dxt_test:
add edi,8
dec eax
jz end_dxt_test
add edx,ebx
jns dxt_test
dxt_s_test:
inc ecx
dec eax
jz end_dxt_test
add edx,ebx
js dxt_s_test
// swap this data (ecx set, dst set)
call [swp] // (ecx reset to 0 after)
jmp dxt_test // and repeat
end_dxt_test:
// swap any remaining data
call [swp]
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret
}
}
void LoadBlock32b(wxUint32 tile, wxUint32 ul_s, wxUint32 ul_t, wxUint32 lr_s, wxUint32 dxt);
static void rdp_loadblock()
{
@ -1916,7 +2107,65 @@ static void rdp_loadblock()
setTBufTex(rdp.tiles[tile].t_mem, cnt);
}
extern "C" void asmLoadTile(int src, int dst, int width, int height, int line, int off, int end);
extern "C" __declspec(naked) void asmLoadTile(int src, int dst, int width, int height, int line, int off, int end)
{
_asm {
align 4
push ebp
mov ebp, esp
push ebx
push esi
push edi
// set initial values
mov edi,[dst]
mov ecx,[width]
mov esi,[src]
mov edx,[off]
xor ebx,ebx // swap this line?
mov eax,[height]
loadtile_loop:
cmp [end],edi // end of tmem: error
jc loadtile_end
// copy this line
push edi
push ecx
call CopyBlock
pop ecx
// swap it?
xor ebx,1
jnz loadtile_no_swap
// (ecx set, restore edi)
pop edi
push ecx
call SwapBlock32
pop ecx
jmp loadtile_swap_end
loadtile_no_swap:
add sp,4 // forget edi, we are already at the next position
loadtile_swap_end:
add edx,[line]
dec eax
jnz loadtile_loop
loadtile_end:
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret
}
}
void LoadTile32b (wxUint32 tile, wxUint32 ul_s, wxUint32 ul_t, wxUint32 width, wxUint32 height);
static void rdp_loadtile()
{