[Glide64] Move the code out of texture.asm.cpp

This commit is contained in:
zilmar 2015-10-13 15:54:52 +11:00
parent d7a19c265f
commit 48554d2ad0
10 changed files with 3538 additions and 3886 deletions

View File

@ -270,10 +270,6 @@
RelativePath="TexModCI.h" RelativePath="TexModCI.h"
> >
</File> </File>
<File
RelativePath=".\Texture.asm.cpp"
>
</File>
</Filter> </Filter>
<Filter <Filter
Name="Config" Name="Config"

View File

@ -41,9 +41,156 @@
// //
//**************************************************************** //****************************************************************
extern "C" void asmMirror32bS (int tex, int start, int width, int height, int mask, int line, int full, int count); extern "C" void __declspec(naked) asmMirror32bS (int tex, int start, int width, int height, int mask, int line, int full, int count)
extern "C" void asmWrap32bS (int tex, int start, int height, int mask, int line, int full, int count); {
extern "C" void asmClamp32bS (int tex, int constant, int height,int line, int full, int count); _asm {
align 4
push ebp
mov ebp, esp
push ebx
push esi
push edi
mov edi,[start]
mov ecx,[height]
loop_y:
xor edx,edx
loop_x:
mov esi,[tex]
mov ebx,[width]
add ebx,edx
and ebx,[width]
jnz is_mirrored
mov eax,edx
shl eax,2
and eax,[mask]
add esi,eax
mov eax,[esi]
mov [edi],eax
add edi,4
jmp end_mirror_check
is_mirrored:
add esi,[mask]
mov eax,edx
shl eax,2
and eax,[mask]
sub esi,eax
mov eax,[esi]
mov [edi],eax
add edi,4
end_mirror_check:
inc edx
cmp edx,[count]
jne loop_x
add edi,[line]
mov eax,[tex]
add eax,[full]
mov [tex],eax
dec ecx
jnz loop_y
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret
}
}
extern "C" void __declspec(naked) asmWrap32bS (int tex, int start, int height, int mask, int line, int full, int count)
{
_asm {
align 4
push ebp
mov ebp, esp
push ebx
push esi
push edi
mov edi,[start]
mov ecx,[height]
loop_y:
xor edx,edx
loop_x:
mov esi,[tex]
mov eax,edx
and eax,[mask]
shl eax,2
add esi,eax
mov eax,[esi]
mov [edi],eax
add edi,4
inc edx
cmp edx,[count]
jne loop_x
add edi,[line]
mov eax,[tex]
add eax,[full]
mov [tex],eax
dec ecx
jnz loop_y
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret
}
}
extern "C" void __declspec(naked) asmClamp32bS (int tex, int constant, int height,int line, int full, int count)
{
_asm {
align 4
push ebp
mov ebp, esp
push ebx
push esi
push edi
mov esi,[constant]
mov edi,[tex]
mov ecx,[height]
y_loop:
mov eax,[esi]
mov edx,[count]
x_loop:
mov [edi],eax // don't unroll or make dword, it may go into next line (doesn't have to be multiple of two)
add edi,4
dec edx
jnz x_loop
add esi,[full]
add edi,[line]
dec ecx
jnz y_loop
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret
}
}
//**************************************************************** //****************************************************************
// 32-bit Horizontal Mirror // 32-bit Horizontal Mirror

View File

@ -40,9 +40,155 @@
//**************************************************************** //****************************************************************
// 8-bit Horizontal Mirror // 8-bit Horizontal Mirror
extern "C" void asmMirror8bS (int tex, int start, int width, int height, int mask, int line, int full, int count); extern "C" void __declspec(naked) asmMirror8bS (int tex, int start, int width, int height, int mask, int line, int full, int count)
extern "C" void asmWrap8bS (int tex, int start, int height, int mask, int line, int full, int count); {
extern "C" void asmClamp8bS (int tex, int constant, int height,int line, int full, int count); _asm{
ALIGN 4
push ebp
mov ebp, esp
push ebx
push esi
push edi
mov edi,[start]
mov ecx,[height]
loop_y:
xor edx,edx
loop_x:
mov esi,[tex]
mov ebx,[width]
add ebx,edx
and ebx,[width]
jnz is_mirrored
mov eax,edx
and eax,[mask]
add esi,eax
mov al,[esi]
mov [edi],al
inc edi
jmp end_mirror_check
is_mirrored:
add esi,[mask]
mov eax,edx
and eax,[mask]
sub esi,eax
mov al,[esi]
mov [edi],al
inc edi
end_mirror_check:
inc edx
cmp edx,[count]
jne loop_x
add edi,[line]
mov eax,[tex]
add eax,[full]
mov [tex],eax
dec ecx
jnz loop_y
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret
}
}
extern "C" void __declspec(naked) asmWrap8bS (int tex, int start, int height, int mask, int line, int full, int count)
{
_asm {
align 4
push ebp
mov ebp, esp
push ebx
push esi
push edi
mov edi,[start]
mov ecx,[height]
loop_y:
xor edx,edx
loop_x:
mov esi,[tex]
mov eax,edx
and eax,[mask]
shl eax,2
add esi,eax
mov eax,[esi]
mov [edi],eax
add edi,4
inc edx
cmp edx,[count]
jne loop_x
add edi,[line]
mov eax,[tex]
add eax,[full]
mov [tex],eax
dec ecx
jnz loop_y
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret
}
}
extern "C" void __declspec(naked) asmClamp8bS (int tex, int constant, int height,int line, int full, int count)
{
_asm {
align 4
push ebp
mov ebp, esp
push ebx
push esi
push edi
mov esi,[constant]
mov edi,[tex]
mov ecx,[height]
y_loop:
mov al,[esi]
mov edx,[count]
x_loop:
mov [edi],al // don't unroll or make dword, it may go into next line (doesn't have to be multiple of two)
inc edi
dec edx
jnz x_loop
add esi,[full]
add edi,[line]
dec ecx
jnz y_loop
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret
}
}
void Mirror8bS (wxUint32 tex, wxUint32 mask, wxUint32 max_width, wxUint32 real_width, wxUint32 height) void Mirror8bS (wxUint32 tex, wxUint32 mask, wxUint32 max_width, wxUint32 real_width, wxUint32 height)
{ {

View File

@ -150,7 +150,55 @@ void ClearCache ()
//**************************************************************** //****************************************************************
// GetTexInfo - gets information for either t0 or t1, checks if in cache & fills tex_found // GetTexInfo - gets information for either t0 or t1, checks if in cache & fills tex_found
extern "C" int asmTextureCRC(int addr, int width, int height, int line); extern "C" __declspec(naked) int asmTextureCRC(int addr, int width, int height, int line)
{
_asm {
align 4
push ebp
mov ebp, esp
push ebx
push edi
xor eax,eax // eax is final result
mov ebx,[line]
mov ecx,[height] // ecx is height counter
mov edi,[addr] // edi is ptr to texture memory
crc_loop_y:
push ecx
mov ecx,[width]
crc_loop_x:
add eax,[edi] // MUST be 64-bit aligned, so manually unroll
add eax,[edi+4]
mov edx,ecx
mul edx
add eax,edx
add edi,8
dec ecx
jnz crc_loop_x
pop ecx
mov edx,ecx
mul edx
add eax,edx
add edi,ebx
dec ecx
jnz crc_loop_y
pop edi
pop ebx
mov esp, ebp
pop ebp
ret
}
}
void GetTexInfo (int id, int tile) void GetTexInfo (int id, int tile)
{ {
FRDP (" | |-+ GetTexInfo (id: %d, tile: %d)\n", id, tile); FRDP (" | |-+ GetTexInfo (id: %d, tile: %d)\n", id, tile);

View File

@ -37,10 +37,270 @@
// //
//**************************************************************** //****************************************************************
extern "C" void asmTexConv_ARGB1555_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int size); extern "C" void __declspec(naked) asmTexConv_ARGB1555_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int isize)
extern "C" void asmTexConv_AI88_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int size); {
extern "C" void asmTexConv_AI44_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int size); _asm {
extern "C" void asmTexConv_A8_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int size); align 4
push ebp
mov ebp, esp
push ebx
push esi
push edi
mov esi,[src]
mov edi,[dst]
mov ecx,[isize]
tc1_loop:
mov eax,[esi]
add esi,4
// arrr rrgg gggb bbbb
// aaaa rrrr gggg bbbb
mov edx,eax
and eax,0x80008000
mov ebx,eax // ebx = 0xa000000000000000
shr eax,1
or ebx,eax // ebx = 0xaa00000000000000
shr eax,1
or ebx,eax // ebx = 0xaaa0000000000000
shr eax,1
or ebx,eax // ebx = 0xaaaa000000000000
mov eax,edx
and eax,0x78007800 // eax = 0x0rrrr00000000000
shr eax,3 // eax = 0x0000rrrr00000000
or ebx,eax // ebx = 0xaaaarrrr00000000
mov eax,edx
and eax,0x03c003c0 // eax = 0x000000gggg000000
shr eax,2 // eax = 0x00000000gggg0000
or ebx,eax // ebx = 0xaaaarrrrgggg0000
and edx,0x001e001e // edx = 0x00000000000bbbb0
shr edx,1 // edx = 0x000000000000bbbb
or ebx,edx // ebx = 0xaaaarrrrggggbbbb
mov [edi],ebx
add edi,4
dec ecx
jnz tc1_loop
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret
}
}
extern "C" void __declspec(naked) asmTexConv_AI88_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int isize)
{
_asm {
align 4
push ebp
mov ebp, esp
push ebx
push esi
push edi
mov esi,[src]
mov edi,[dst]
mov ecx,[isize]
tc1_loop:
mov eax,[esi]
add esi,4
// aaaa aaaa iiii iiii
// aaaa rrrr gggg bbbb
mov edx,eax
and eax,0xF000F000 // eax = 0xaaaa000000000000
mov ebx,eax // ebx = 0xaaaa000000000000
and edx,0x00F000F0 // edx = 0x00000000iiii0000
shl edx,4 // edx = 0x0000iiii00000000
or ebx,edx // ebx = 0xaaaaiiii00000000
shr edx,4 // edx = 0x00000000iiii0000
or ebx,edx // ebx = 0xaaaaiiiiiiii0000
shr edx,4 // edx = 0x000000000000iiii
or ebx,edx // ebx = 0xaaaaiiiiiiiiiiii
mov [edi],ebx
add edi,4
dec ecx
jnz tc1_loop
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret
}
}
extern "C" void __declspec(naked) asmTexConv_AI44_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int isize)
{
_asm {
align 4
push ebp
mov ebp, esp
push ebx
push esi
push edi
mov esi,[src]
mov edi,[dst]
mov ecx,[isize]
tc1_loop:
mov eax,[esi]
add esi,4
// aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0
// aaaa1 rrrr1 gggg1 bbbb1 aaaa0 rrrr0 gggg0 bbbb0
// aaaa3 rrrr3 gggg3 bbbb3 aaaa2 rrrr2 gggg2 bbbb2
mov edx,eax // eax = aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0
shl eax,16 // eax = aaaa1 iiii1 aaaa0 iiii0 0000 0000 0000 0000
and eax,0xFF000000 // eax = aaaa1 iiii1 0000 0000 0000 0000 0000 0000
mov ebx,eax // ebx = aaaa1 iiii1 0000 0000 0000 0000 0000 0000
and eax,0x0F000000 // eax = 0000 iiii1 0000 0000 0000 0000 0000 0000
shr eax,4 // eax = 0000 0000 iiii1 0000 0000 0000 0000 0000
or ebx,eax // ebx = aaaa1 iiii1 iiii1 0000 0000 0000 0000 0000
shr eax,4 // eax = 0000 0000 0000 iiii1 0000 0000 0000 0000
or ebx,eax // ebx = aaaa1 iiii1 iiii1 iiii1 0000 0000 0000 0000
mov eax,edx // eax = aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0
shl eax,8 // eax = aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0 0000 0000
and eax,0x0000FF00 // eax = 0000 0000 0000 0000 aaaa0 iiii0 0000 0000
or ebx,eax // ebx = aaaa1 iiii1 iiii1 iiii1 aaaa0 iiii0 0000 0000
and eax,0x00000F00 // eax = 0000 0000 0000 0000 0000 iiii0 0000 0000
shr eax,4 // eax = 0000 0000 0000 0000 0000 0000 iiii0 0000
or ebx,eax // ebx = aaaa1 iiii1 iiii1 iiii1 aaaa0 iiii0 iiii0 0000
shr eax,4 // eax = 0000 0000 0000 0000 0000 0000 0000 iiii0
or ebx,eax // ebx = aaaa1 iiii1 iiii1 iiii1 aaaa0 iiii0 iiii0 iiii0
mov [edi],ebx
add edi,4
mov eax,edx // eax = aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0
and eax,0xFF000000 // eax = aaaa3 iiii3 0000 0000 0000 0000 0000 0000
mov ebx,eax // ebx = aaaa3 iiii3 0000 0000 0000 0000 0000 0000
and eax,0x0F000000 // eax = 0000 iiii3 0000 0000 0000 0000 0000 0000
shr eax,4 // eax = 0000 0000 iiii3 0000 0000 0000 0000 0000
or ebx,eax // ebx = aaaa3 iiii3 iiii3 0000 0000 0000 0000 0000
shr eax,4 // eax = 0000 0000 0000 iiii3 0000 0000 0000 0000
or ebx,eax // ebx = aaaa3 iiii3 iiii3 iiii3 0000 0000 0000 0000
// edx = aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0
shr edx,8 // edx = 0000 0000 aaaa3 aaaa3 aaaa2 iiii2 aaaa1 iiii1
and edx,0x0000FF00 // edx = 0000 0000 0000 0000 aaaa2 iiii2 0000 0000
or ebx,edx // ebx = aaaa3 iiii3 iiii3 iiii3 aaaa2 iiii2 0000 0000
and edx,0x00000F00 // edx = 0000 0000 0000 0000 0000 iiii2 0000 0000
shr edx,4 // edx = 0000 0000 0000 0000 0000 0000 iiii2 0000
or ebx,edx // ebx = aaaa3 iiii3 iiii3 iiii3 aaaa2 iiii2 iiii2 0000
shr edx,4 // edx = 0000 0000 0000 0000 0000 0000 0000 iiii2
or ebx,edx // ebx = aaaa3 iiii3 iiii3 iiii3 aaaa2 iiii2 iiii2 iiii2
mov [edi],ebx
add edi,4
dec ecx
jnz tc1_loop
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret
}
}
extern "C" void __declspec(naked) asmTexConv_A8_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int isize)
{
_asm {
align 4
push ebp
mov ebp, esp
push ebx
push esi
push edi
mov esi,[src]
mov edi,[dst]
mov ecx,[isize]
tc1_loop:
mov eax,[esi]
add esi,4
// aaaa3 aaaa3 aaaa2 aaaa2 aaaa1 aaaa1 aaaa0 aaaa0
// aaaa1 rrrr1 gggg1 bbbb1 aaaa0 rrrr0 gggg0 bbbb0
// aaaa3 rrrr3 gggg3 bbbb3 aaaa2 rrrr2 gggg2 bbbb2
mov edx,eax
and eax,0x0000F000 // eax = 00 00 00 00 a1 00 00 00
shl eax,16 // eax = a1 00 00 00 00 00 00 00
mov ebx,eax // ebx = a1 00 00 00 00 00 00 00
shr eax,4
or ebx,eax // ebx = a1 a1 00 00 00 00 00 00
shr eax,4
or ebx,eax // ebx = a1 a1 a1 00 00 00 00 00
shr eax,4
or ebx,eax // ebx = a1 a1 a1 a1 00 00 00 00
mov eax,edx
and eax,0x000000F0 // eax = 00 00 00 00 00 00 a0 00
shl eax,8 // eax = 00 00 00 00 a0 00 00 00
or ebx,eax
shr eax,4
or ebx,eax
shr eax,4
or ebx,eax
shr eax,4
or ebx,eax // ebx = a1 a1 a1 a1 a0 a0 a0 a0
mov [edi],ebx
add edi,4
mov eax,edx // eax = a3 a3 a2 a2 a1 a1 a0 a0
and eax,0xF0000000 // eax = a3 00 00 00 00 00 00 00
mov ebx,eax // ebx = a3 00 00 00 00 00 00 00
shr eax,4
or ebx,eax // ebx = a3 a3 00 00 00 00 00 00
shr eax,4
or ebx,eax // ebx = a3 a3 a3 00 00 00 00 00
shr eax,4
or ebx,eax // ebx = a3 a3 a3 a3 00 00 00 00
and edx,0x00F00000 // eax = 00 00 a2 00 00 00 00 00
shr edx,8 // eax = 00 00 00 00 a2 00 00 00
or ebx,edx
shr edx,4
or ebx,edx
shr edx,4
or ebx,edx
shr edx,4
or ebx,edx // ebx = a3 a3 a3 a3 a2 a2 a2 a2
mov [edi],ebx
add edi,4
dec ecx
jnz tc1_loop
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret
}
}
void TexConv_ARGB1555_ARGB4444 (wxUIntPtr src, wxUIntPtr dst, int width, int height) void TexConv_ARGB1555_ARGB4444 (wxUIntPtr src, wxUIntPtr dst, int width, int height)
{ {

View File

@ -37,9 +37,164 @@
// //
//**************************************************************** //****************************************************************
extern "C" void asmLoad16bRGBA (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext); extern "C" void __declspec(naked) asmLoad16bRGBA (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext)
extern "C" void asmLoad16bIA (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext); {
_asm {
align 4
push ebp
mov ebp,esp
push ebx
push esi
push edi
mov esi,[src]
mov edi,[dst]
mov ecx,[height]
y_loop:
push ecx
mov ecx,[wid_64]
x_loop:
mov eax,[esi] // read both pixels
mov ebx,[esi+4] // read both pixels
bswap eax
bswap ebx
ror ax,1
ror bx,1
ror eax,16
ror ebx,16
ror ax,1
ror bx,1
mov [edi],eax
mov [edi+4],ebx
add esi,8
add edi,8
dec ecx
jnz x_loop
pop ecx
dec ecx
jz end_y_loop
push ecx
mov eax,esi
add eax,[line]
mov esi,[src]
sub eax, esi
and eax, 0xFFF
add esi, eax
add edi,[ext]
mov ecx,[wid_64]
x_loop_2:
mov eax,[esi+4] // read both pixels
mov ebx,[esi] // read both pixels
bswap eax
bswap ebx
ror ax,1
ror bx,1
ror eax,16
ror ebx,16
ror ax,1
ror bx,1
mov [edi],eax
mov [edi+4],ebx
add esi,8
add edi,8
dec ecx
jnz x_loop_2
mov eax,esi
add eax,[line]
mov esi,[src]
sub eax, esi
and eax, 0xFFF
add esi, eax
add edi,[ext]
pop ecx
dec ecx
jnz y_loop
end_y_loop:
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret
}
}
extern "C" void __declspec(naked) asmLoad16bIA (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext)
{
_asm {
ALIGN 4
push ebp
mov ebp, esp
push ebx
push esi
push edi
mov esi,[src]
mov edi,[dst]
mov ecx,[height]
y_loop:
push ecx
mov ecx,[wid_64]
x_loop:
mov eax,[esi] // read both pixels
mov ebx,[esi+4] // read both pixels
mov [edi],eax
mov [edi+4],ebx
add esi,8
add edi,8
dec ecx
jnz x_loop
pop ecx
dec ecx
jz end_y_loop
push ecx
add esi,[line]
add edi,[ext]
mov ecx,[wid_64]
x_loop_2:
mov eax,[esi+4] // read both pixels
mov ebx,[esi] // read both pixels
mov [edi],eax
mov [edi+4],ebx
add esi,8
add edi,8
dec ecx
jnz x_loop_2
add esi,[line]
add edi,[ext]
pop ecx
dec ecx
jnz y_loop
end_y_loop:
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret
}
}
//**************************************************************** //****************************************************************
// Size: 2, Format: 0 // Size: 2, Format: 0

File diff suppressed because it is too large Load Diff

View File

@ -37,10 +37,630 @@
// //
//**************************************************************** //****************************************************************
extern "C" void asmLoad8bCI (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext, wxUIntPtr pal); extern "C" void __declspec(naked) asmLoad8bCI (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext, wxUIntPtr pal)
extern "C" void asmLoad8bIA8 (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext, wxUIntPtr pal); {
extern "C" void asmLoad8bIA4 (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext); _asm {
extern "C" void asmLoad8bI (wxUIntPtr src, int dst, wxUIntPtr wid_64, int height, int line, int ext); push ebp
mov ebp, esp
push ebx
push esi
push edi
mov ebx,[pal]
mov esi,[src]
mov edi,[dst]
mov ecx,[height]
y_loop:
push ecx
mov ecx,[wid_64]
x_loop:
push ecx
mov eax,[esi] // read all 4 pixels
bswap eax
add esi,4
mov edx,eax
// 1st dword output {
shr eax,15
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
mov eax,edx
shr eax,23
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
mov [edi],ecx
add edi,4
// }
// 2nd dword output {
mov eax,edx
shl eax,1
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
shr edx,7
and edx,0x1FE
mov cx,[ebx+edx]
ror cx,1
mov [edi],ecx
add edi,4
// }
// * copy
mov eax,[esi] // read all 4 pixels
bswap eax
add esi,4
mov edx,eax
// 1st dword output {
shr eax,15
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
mov eax,edx
shr eax,23
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
mov [edi],ecx
add edi,4
// }
// 2nd dword output {
mov eax,edx
shl eax,1
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
shr edx,7
and edx,0x1FE
mov cx,[ebx+edx]
ror cx,1
mov [edi],ecx
add edi,4
// }
// *
pop ecx
dec ecx
jnz x_loop
pop ecx
dec ecx
jz near end_y_loop
push ecx
mov eax,esi
add eax,[line]
mov esi,[src]
sub eax,esi
and eax,0x7FF
add esi,eax
add edi,[ext]
mov ecx,[wid_64]
x_loop_2:
push ecx
mov eax,[esi+4] // read all 4 pixels
bswap eax
mov edx,eax
// 1st dword output {
shr eax,15
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
mov eax,edx
shr eax,23
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
mov [edi],ecx
add edi,4
// }
// 2nd dword output {
mov eax,edx
shl eax,1
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
shr edx,7
and edx,0x1FE
mov cx,[ebx+edx]
ror cx,1
mov [edi],ecx
add edi,4
// }
// * copy
mov eax,[esi] // read all 4 pixels
bswap eax
mov edx,esi
add edx,8
mov esi,[src]
sub edx,esi
and edx,0x7FF
add esi,edx
mov edx,eax
// 1st dword output {
shr eax,15
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
mov eax,edx
shr eax,23
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
mov [edi],ecx
add edi,4
// }
// 2nd dword output {
mov eax,edx
shl eax,1
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,1
shl ecx,16
shr edx,7
and edx,0x1FE
mov cx,[ebx+edx]
ror cx,1
mov [edi],ecx
add edi,4
// }
// *
pop ecx
dec ecx
jnz x_loop_2
mov eax,esi
add eax,[line]
mov esi,[src]
sub eax,esi
and eax,0x7FF
add esi,eax
add edi,[ext]
pop ecx
dec ecx
jnz y_loop
end_y_loop:
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret
}
}
extern "C" void __declspec(naked) asmLoad8bIA8 (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext, wxUIntPtr pal)
{
_asm {
push ebp
mov ebp, esp
push ebx
push esi
push edi
mov ebx,[pal]
mov esi,[src]
mov edi,[dst]
mov ecx,[height]
y_loop:
push ecx
mov ecx,[wid_64]
x_loop:
push ecx
mov eax,[esi] // read all 4 pixels
bswap eax
add esi,4
mov edx,eax
// 1st dword output {
shr eax,15
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
mov eax,edx
shr eax,23
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
mov [edi],ecx
add edi,4
// }
// 2nd dword output {
mov eax,edx
shl eax,1
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
shr edx,7
and edx,0x1FE
mov cx,[ebx+edx]
ror cx,8
mov [edi],ecx
add edi,4
// }
// * copy
mov eax,[esi] // read all 4 pixels
bswap eax
add esi,4
mov edx,eax
// 1st dword output {
shr eax,15
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
mov eax,edx
shr eax,23
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
mov [edi],ecx
add edi,4
// }
// 2nd dword output {
mov eax,edx
shl eax,1
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
shr edx,7
and edx,0x1FE
mov cx,[ebx+edx]
ror cx,8
mov [edi],ecx
add edi,4
// }
// *
pop ecx
dec ecx
jnz x_loop
pop ecx
dec ecx
jz near end_y_loop
push ecx
add esi,[line]
add edi,[ext]
mov ecx,[wid_64]
x_loop_2:
push ecx
mov eax,[esi+4] // read all 4 pixels
bswap eax
mov edx,eax
// 1st dword output {
shr eax,15
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
mov eax,edx
shr eax,23
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
mov [edi],ecx
add edi,4
// }
// 2nd dword output {
mov eax,edx
shl eax,1
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
shr edx,7
and edx,0x1FE
mov cx,[ebx+edx]
ror cx,8
mov [edi],ecx
add edi,4
// }
// * copy
mov eax,[esi] // read all 4 pixels
bswap eax
add esi,8
mov edx,eax
// 1st dword output {
shr eax,15
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
mov eax,edx
shr eax,23
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
mov [edi],ecx
add edi,4
// }
// 2nd dword output {
mov eax,edx
shl eax,1
and eax,0x1FE
mov cx,[ebx+eax]
ror cx,8
shl ecx,16
shr edx,7
and edx,0x1FE
mov cx,[ebx+edx]
ror cx,8
mov [edi],ecx
add edi,4
// }
// *
pop ecx
dec ecx
jnz x_loop_2
add esi,[line]
add edi,[ext]
pop ecx
dec ecx
jnz y_loop
end_y_loop:
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret
}
}
extern "C" void __declspec(naked) asmLoad8bIA4 (wxUIntPtr src, wxUIntPtr dst, int wid_64, int height, int line, int ext)
{
_asm {
push ebp
mov ebp, esp
push ebx
push esi
push edi
mov esi,[src]
mov edi,[dst]
mov ecx,[height]
y_loop:
push ecx
mov ecx,[wid_64]
x_loop:
mov eax,[esi] // read all 4 pixels
mov edx,eax
shr eax,4 //all alpha
shl edx,4
and eax,0x0F0F0F0F
and edx,0xF0F0F0F0
add esi,4
or eax,edx
mov [edi],eax // save dword
add edi,4
mov eax,[esi] // read all 4 pixels
mov edx,eax
shr eax,4 //all alpha
shl edx,4
and eax,0x0F0F0F0F
and edx,0xF0F0F0F0
add esi,4
or eax,edx
mov [edi],eax // save dword
add edi,4
// *
dec ecx
jnz x_loop
pop ecx
dec ecx
jz end_y_loop
push ecx
add esi,[line]
add edi,[ext]
mov ecx,[wid_64]
x_loop_2:
mov eax,[esi+4] // read both pixels
mov edx,eax
shr eax,4 //all alpha
shl edx,4
and eax,0x0F0F0F0F
and edx,0xF0F0F0F0
or eax,edx
mov [edi],eax //save dword
add edi,4
mov eax,[esi] // read both pixels
add esi,8
mov edx,eax
shr eax,4 //all alpha
shl edx,4
and eax,0x0F0F0F0F
and edx,0xF0F0F0F0
or eax,edx
mov [edi],eax //save dword
add edi,4
// *
dec ecx
jnz x_loop_2
add esi,[line]
add edi,[ext]
pop ecx
dec ecx
jnz y_loop
end_y_loop:
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret
}
}
extern "C" void __declspec(naked) asmLoad8bI (wxUIntPtr src, int dst, wxUIntPtr wid_64, int height, int line, int ext)
{
_asm {
push ebp
mov ebp, esp
push ebx
push esi
push edi
mov esi,[src]
mov edi,[dst]
mov ecx,[height]
y_loop:
push ecx
mov ecx,[wid_64]
x_loop:
mov eax,[esi] // read all 4 pixels
add esi,4
mov [edi],eax // save dword
add edi,4
mov eax,[esi] // read all 4 pixels
add esi,4
mov [edi],eax // save dword
add edi,4
// *
dec ecx
jnz x_loop
pop ecx
dec ecx
jz end_y_loop
push ecx
add esi,[line]
add edi,[ext]
mov ecx,[wid_64]
x_loop_2:
mov eax,[esi+4] // read both pixels
mov [edi],eax //save dword
add edi,4
mov eax,[esi] // read both pixels
add esi,8
mov [edi],eax //save dword
add edi,4
// *
dec ecx
jnz x_loop_2
add esi,[line]
add edi,[ext]
pop ecx
dec ecx
jnz y_loop
end_y_loop:
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret
}
}
//**************************************************************** //****************************************************************
// Size: 1, Format: 2 // Size: 1, Format: 2

File diff suppressed because it is too large Load Diff

View File

@ -48,8 +48,38 @@
#include "FBtoScreen.h" #include "FBtoScreen.h"
#include "CRC.h" #include "CRC.h"
extern "C" void SwapBlock32 (); extern "C" __declspec(naked) void SwapBlock32 ( void )
extern "C" void SwapBlock64 (); {
//****************************************************************
// SwapBlock - swaps every other 32-bit word at addr
//
// ecx = num_words -> 0
// edi = addr -> end of dest
//****************************************************************
_asm {
align 4
push ebp
mov ebp, esp
push eax
push ebx
or ecx,ecx
jz swapblock32_end
swapblock32_loop:
mov eax,[edi]
mov ebx,[edi+4]
mov [edi],ebx
mov [edi+4],eax
add edi,8
dec ecx
jnz swapblock32_loop
swapblock32_end:
pop ebx
pop eax
mov esp, ebp
pop ebp
ret
}
}
const int NumOfFormats = 3; const int NumOfFormats = 3;
SCREEN_SHOT_FORMAT ScreenShotFormats[NumOfFormats] = { {wxT("BMP"), wxT("bmp"), wxBITMAP_TYPE_BMP}, {wxT("PNG"), wxT("png"), wxBITMAP_TYPE_PNG}, {wxT("JPEG"), wxT("jpeg"), wxBITMAP_TYPE_JPEG} }; SCREEN_SHOT_FORMAT ScreenShotFormats[NumOfFormats] = { {wxT("BMP"), wxT("bmp"), wxBITMAP_TYPE_BMP}, {wxT("PNG"), wxT("png"), wxBITMAP_TYPE_PNG}, {wxT("JPEG"), wxT("jpeg"), wxBITMAP_TYPE_JPEG} };
@ -1824,7 +1854,168 @@ void setTBufTex(wxUint16 t_mem, wxUint32 cnt)
} }
} }
extern "C" void asmLoadBlock(int src, int dst, int off, int dxt, int cnt, int swp); void __declspec(naked) CopyBlock ( void )
{
_asm {
align 4
push ebp
mov ebp, esp
push eax
push ebx
push esi
push edx
or ecx,ecx
jz near copyblock_end
push ecx
// first, set the source address and check if not on a dword boundary
push esi
push edx
mov ebx,edx
and edx,0FFFFFFFCh
add esi,edx
and ebx,3 // ebx = # we DON'T need to copy
jz copyblock_copy
mov edx,4 // ecx = # we DO need to copy
sub edx,ebx
// load the first word, accounting for swapping
mov eax,[esi]
add esi,4
copyblock_precopy_skip:
rol eax,8
dec ebx
jnz copyblock_precopy_skip
copyblock_precopy_copy:
rol eax,8
mov [edi],al
inc edi
dec edx
jnz copyblock_precopy_copy
mov eax,[esi]
add esi,4
bswap eax
mov [edi],eax
add edi,4
dec ecx // 1 less word to copy
jz copyblock_postcopy
copyblock_copy:
mov eax,[esi]
bswap eax
mov [edi],eax
mov eax,[esi+4]
bswap eax
mov [edi+4],eax
add esi,8
add edi,8
dec ecx
jnz copyblock_copy
copyblock_postcopy:
pop edx
pop esi
pop ecx
// check again if on dword boundary
mov ebx,edx // ebx = # we DO need to copy
and ebx,3
jz copyblock_end
shl ecx,3 // ecx = num_words * 8
add edx,ecx
and edx,0FFFFFFFCh
add esi,edx
mov eax,[esi]
copyblock_postcopy_copy:
rol eax,8
mov [edi],al
inc edi
dec ebx
jnz copyblock_postcopy_copy
copyblock_end:
pop edx
pop esi
pop ebx
pop eax
mov esp, ebp
pop ebp
ret
}
}
extern "C" __declspec(naked) void asmLoadBlock(int src, int dst, int off, int dxt, int cnt, wxUIntPtr swp)
{
_asm {
align 4
push ebp
mov ebp, esp
push ebx
push esi
push edi
// copy the data
mov esi,[src]
mov edi,[dst]
mov ecx,[cnt]
mov edx,[off]
call CopyBlock
// now swap it
mov eax,[cnt] // eax = count remaining
xor edx,edx // edx = dxt counter
mov edi,[dst]
mov ebx,[dxt]
xor ecx,ecx // ecx = how much to copy
dxt_test:
add edi,8
dec eax
jz end_dxt_test
add edx,ebx
jns dxt_test
dxt_s_test:
inc ecx
dec eax
jz end_dxt_test
add edx,ebx
js dxt_s_test
// swap this data (ecx set, dst set)
call [swp] // (ecx reset to 0 after)
jmp dxt_test // and repeat
end_dxt_test:
// swap any remaining data
call [swp]
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret
}
}
void LoadBlock32b(wxUint32 tile, wxUint32 ul_s, wxUint32 ul_t, wxUint32 lr_s, wxUint32 dxt); void LoadBlock32b(wxUint32 tile, wxUint32 ul_s, wxUint32 ul_t, wxUint32 lr_s, wxUint32 dxt);
static void rdp_loadblock() static void rdp_loadblock()
{ {
@ -1916,7 +2107,65 @@ static void rdp_loadblock()
setTBufTex(rdp.tiles[tile].t_mem, cnt); setTBufTex(rdp.tiles[tile].t_mem, cnt);
} }
extern "C" void asmLoadTile(int src, int dst, int width, int height, int line, int off, int end); extern "C" __declspec(naked) void asmLoadTile(int src, int dst, int width, int height, int line, int off, int end)
{
_asm {
align 4
push ebp
mov ebp, esp
push ebx
push esi
push edi
// set initial values
mov edi,[dst]
mov ecx,[width]
mov esi,[src]
mov edx,[off]
xor ebx,ebx // swap this line?
mov eax,[height]
loadtile_loop:
cmp [end],edi // end of tmem: error
jc loadtile_end
// copy this line
push edi
push ecx
call CopyBlock
pop ecx
// swap it?
xor ebx,1
jnz loadtile_no_swap
// (ecx set, restore edi)
pop edi
push ecx
call SwapBlock32
pop ecx
jmp loadtile_swap_end
loadtile_no_swap:
add sp,4 // forget edi, we are already at the next position
loadtile_swap_end:
add edx,[line]
dec eax
jnz loadtile_loop
loadtile_end:
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret
}
}
void LoadTile32b (wxUint32 tile, wxUint32 ul_s, wxUint32 ul_t, wxUint32 width, wxUint32 height); void LoadTile32b (wxUint32 tile, wxUint32 ul_s, wxUint32 ul_t, wxUint32 width, wxUint32 height);
static void rdp_loadtile() static void rdp_loadtile()
{ {