[Glide64] Remove asm from TextConv.h

This commit is contained in:
zilmar 2015-10-14 11:38:47 +11:00
parent 3d52a163cd
commit dd1c306d16
2 changed files with 115 additions and 312 deletions

View File

@ -149,57 +149,28 @@ void ClearCache ()
}
//****************************************************************
// GetTexInfo - gets information for either t0 or t1, checks if in cache & fills tex_found
extern "C" __declspec(naked) int asmTextureCRC(int addr, int width, int height, int line)
uint32_t textureCRC(uint8_t *addr, int width, int height, int line)
{
_asm {
align 4
push ebp
mov ebp, esp
uint32_t crc = 0;
uint32_t *pixelpos;
unsigned int i;
uint64_t twopixel_crc;
push ebx
push edi
xor eax,eax // eax is final result
mov ebx,[line]
mov ecx,[height] // ecx is height counter
mov edi,[addr] // edi is ptr to texture memory
crc_loop_y:
push ecx
mov ecx,[width]
crc_loop_x:
add eax,[edi] // MUST be 64-bit aligned, so manually unroll
add eax,[edi+4]
mov edx,ecx
mul edx
add eax,edx
add edi,8
dec ecx
jnz crc_loop_x
pop ecx
mov edx,ecx
mul edx
add eax,edx
add edi,ebx
dec ecx
jnz crc_loop_y
pop edi
pop ebx
mov esp, ebp
pop ebp
ret
pixelpos = (uint32_t*)addr;
for (; height; height--) {
for (i = width; i; --i) {
twopixel_crc = i * (uint64_t)(pixelpos[1] + pixelpos[0] + crc);
crc = (uint32_t) ((twopixel_crc >> 32) + twopixel_crc);
pixelpos += 2;
}
crc = ((unsigned int)height * (uint64_t)crc >> 32) + height * crc;
pixelpos = (uint32_t *)((char *)pixelpos + line);
}
return crc;
}
// GetTexInfo - gets information for either t0 or t1, checks if in cache & fills tex_found
void GetTexInfo (int id, int tile)
{
FRDP (" | |-+ GetTexInfo (id: %d, tile: %d)\n", id, tile);
@ -388,17 +359,17 @@ void GetTexInfo (int id, int tile)
{
line = (line - wid_64) << 3;
if (wid_64 < 1) wid_64 = 1;
wxUIntPtr addr = wxPtrToUInt(rdp.tmem) + (rdp.tiles[tile].t_mem<<3);
uint8_t * addr = (((uint8_t*)rdp.tmem) + (rdp.tiles[tile].t_mem<<3));
if (crc_height > 0) // Check the CRC
{
if (rdp.tiles[tile].size < 3)
crc = asmTextureCRC(addr, wid_64, crc_height, line);
crc = textureCRC(addr, wid_64, crc_height, line);
else //32b texture
{
int line_2 = line >> 1;
int wid_64_2 = max(1, wid_64 >> 1);
crc = asmTextureCRC(addr, wid_64_2, crc_height, line_2);
crc += asmTextureCRC(addr+0x800, wid_64_2, crc_height, line_2);
crc = textureCRC(addr, wid_64_2, crc_height, line_2);
crc += textureCRC(addr+0x800, wid_64_2, crc_height, line_2);
}
}
}
@ -1087,8 +1058,8 @@ void LoadTex (int id, int tmu)
cache->f_mirror_t = FALSE;
cache->f_wrap_s = FALSE;
cache->f_wrap_t = FALSE;
cache->is_hires_tex = FALSE;
#ifdef TEXTURE_FILTER
cache->is_hires_tex = FALSE;
cache->ricecrc = texinfo[id].ricecrc;
#endif
@ -1534,22 +1505,22 @@ void LoadTex (int id, int tmu)
// Convert the texture to ARGB 4444
if (LOWORD(result) == GR_TEXFMT_ARGB_1555)
{
TexConv_ARGB1555_ARGB4444 (wxPtrToUInt(texture), wxPtrToUInt(tex2), real_x, real_y);
TexConv_ARGB1555_ARGB4444 ((texture), (tex2), real_x, real_y);
texture = tex2;
}
else if (LOWORD(result) == GR_TEXFMT_ALPHA_INTENSITY_88)
{
TexConv_AI88_ARGB4444 (wxPtrToUInt(texture), wxPtrToUInt(tex2), real_x, real_y);
TexConv_AI88_ARGB4444 ((texture), (tex2), real_x, real_y);
texture = tex2;
}
else if (LOWORD(result) == GR_TEXFMT_ALPHA_INTENSITY_44)
{
TexConv_AI44_ARGB4444 (wxPtrToUInt(texture), wxPtrToUInt(tex2), real_x, real_y);
TexConv_AI44_ARGB4444 ((texture), (tex2), real_x, real_y);
texture = tex2;
}
else if (LOWORD(result) == GR_TEXFMT_ALPHA_8)
{
TexConv_A8_ARGB4444 (wxPtrToUInt(texture), wxPtrToUInt(tex2), real_x, real_y);
TexConv_A8_ARGB4444 ((texture), (tex2), real_x, real_y);
texture = tex2;
}
/*else if (LOWORD(result) == GR_TEXFMT_ARGB_4444)

View File

@ -37,299 +37,131 @@
//
//****************************************************************
extern "C" void __declspec(naked) asmTexConv_ARGB1555_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int isize)
static inline void texConv_ARGB1555_ARGB4444(uint8_t *src, uint8_t *dst, int size)
{
_asm {
align 4
push ebp
mov ebp, esp
push ebx
push esi
push edi
uint32_t *v3;
uint32_t *v4;
int v5;
uint32_t v6;
uint32_t v7;
mov esi,[src]
mov edi,[dst]
mov ecx,[isize]
tc1_loop:
mov eax,[esi]
add esi,4
// arrr rrgg gggb bbbb
// aaaa rrrr gggg bbbb
mov edx,eax
and eax,0x80008000
mov ebx,eax // ebx = 0xa000000000000000
shr eax,1
or ebx,eax // ebx = 0xaa00000000000000
shr eax,1
or ebx,eax // ebx = 0xaaa0000000000000
shr eax,1
or ebx,eax // ebx = 0xaaaa000000000000
mov eax,edx
and eax,0x78007800 // eax = 0x0rrrr00000000000
shr eax,3 // eax = 0x0000rrrr00000000
or ebx,eax // ebx = 0xaaaarrrr00000000
mov eax,edx
and eax,0x03c003c0 // eax = 0x000000gggg000000
shr eax,2 // eax = 0x00000000gggg0000
or ebx,eax // ebx = 0xaaaarrrrgggg0000
and edx,0x001e001e // edx = 0x00000000000bbbb0
shr edx,1 // edx = 0x000000000000bbbb
or ebx,edx // ebx = 0xaaaarrrrggggbbbb
mov [edi],ebx
add edi,4
dec ecx
jnz tc1_loop
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret
}
}
extern "C" void __declspec(naked) asmTexConv_AI88_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int isize)
v3 = (uint32_t *)src;
v4 = (uint32_t *)dst;
v5 = size;
do
{
_asm {
align 4
push ebp
mov ebp, esp
push ebx
push esi
push edi
mov esi,[src]
mov edi,[dst]
mov ecx,[isize]
tc1_loop:
mov eax,[esi]
add esi,4
// aaaa aaaa iiii iiii
// aaaa rrrr gggg bbbb
mov edx,eax
and eax,0xF000F000 // eax = 0xaaaa000000000000
mov ebx,eax // ebx = 0xaaaa000000000000
and edx,0x00F000F0 // edx = 0x00000000iiii0000
shl edx,4 // edx = 0x0000iiii00000000
or ebx,edx // ebx = 0xaaaaiiii00000000
shr edx,4 // edx = 0x00000000iiii0000
or ebx,edx // ebx = 0xaaaaiiiiiiii0000
shr edx,4 // edx = 0x000000000000iiii
or ebx,edx // ebx = 0xaaaaiiiiiiiiiiii
mov [edi],ebx
add edi,4
dec ecx
jnz tc1_loop
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret
v6 = *v3;
++v3;
v7 = v6;
*v4 = ((v7 & 0x1E001E) >> 1) | ((v6 & 0x3C003C0) >> 2) | ((v6 & 0x78007800) >> 3) | ((v6 & 0x80008000) >> 3) | ((v6 & 0x80008000) >> 2) | ((v6 & 0x80008000) >> 1) | (v6 & 0x80008000);
++v4;
--v5;
}
while ( v5 );
}
extern "C" void __declspec(naked) asmTexConv_AI44_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int isize)
static inline void texConv_AI88_ARGB4444(uint8_t *src, uint8_t *dst, int size)
{
_asm {
align 4
push ebp
mov ebp, esp
push ebx
push esi
push edi
uint32_t *v3;
uint32_t *v4;
int v5;
uint32_t v6;
uint32_t v7;
mov esi,[src]
mov edi,[dst]
mov ecx,[isize]
tc1_loop:
mov eax,[esi]
add esi,4
// aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0
// aaaa1 rrrr1 gggg1 bbbb1 aaaa0 rrrr0 gggg0 bbbb0
// aaaa3 rrrr3 gggg3 bbbb3 aaaa2 rrrr2 gggg2 bbbb2
mov edx,eax // eax = aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0
shl eax,16 // eax = aaaa1 iiii1 aaaa0 iiii0 0000 0000 0000 0000
and eax,0xFF000000 // eax = aaaa1 iiii1 0000 0000 0000 0000 0000 0000
mov ebx,eax // ebx = aaaa1 iiii1 0000 0000 0000 0000 0000 0000
and eax,0x0F000000 // eax = 0000 iiii1 0000 0000 0000 0000 0000 0000
shr eax,4 // eax = 0000 0000 iiii1 0000 0000 0000 0000 0000
or ebx,eax // ebx = aaaa1 iiii1 iiii1 0000 0000 0000 0000 0000
shr eax,4 // eax = 0000 0000 0000 iiii1 0000 0000 0000 0000
or ebx,eax // ebx = aaaa1 iiii1 iiii1 iiii1 0000 0000 0000 0000
mov eax,edx // eax = aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0
shl eax,8 // eax = aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0 0000 0000
and eax,0x0000FF00 // eax = 0000 0000 0000 0000 aaaa0 iiii0 0000 0000
or ebx,eax // ebx = aaaa1 iiii1 iiii1 iiii1 aaaa0 iiii0 0000 0000
and eax,0x00000F00 // eax = 0000 0000 0000 0000 0000 iiii0 0000 0000
shr eax,4 // eax = 0000 0000 0000 0000 0000 0000 iiii0 0000
or ebx,eax // ebx = aaaa1 iiii1 iiii1 iiii1 aaaa0 iiii0 iiii0 0000
shr eax,4 // eax = 0000 0000 0000 0000 0000 0000 0000 iiii0
or ebx,eax // ebx = aaaa1 iiii1 iiii1 iiii1 aaaa0 iiii0 iiii0 iiii0
mov [edi],ebx
add edi,4
mov eax,edx // eax = aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0
and eax,0xFF000000 // eax = aaaa3 iiii3 0000 0000 0000 0000 0000 0000
mov ebx,eax // ebx = aaaa3 iiii3 0000 0000 0000 0000 0000 0000
and eax,0x0F000000 // eax = 0000 iiii3 0000 0000 0000 0000 0000 0000
shr eax,4 // eax = 0000 0000 iiii3 0000 0000 0000 0000 0000
or ebx,eax // ebx = aaaa3 iiii3 iiii3 0000 0000 0000 0000 0000
shr eax,4 // eax = 0000 0000 0000 iiii3 0000 0000 0000 0000
or ebx,eax // ebx = aaaa3 iiii3 iiii3 iiii3 0000 0000 0000 0000
// edx = aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0
shr edx,8 // edx = 0000 0000 aaaa3 aaaa3 aaaa2 iiii2 aaaa1 iiii1
and edx,0x0000FF00 // edx = 0000 0000 0000 0000 aaaa2 iiii2 0000 0000
or ebx,edx // ebx = aaaa3 iiii3 iiii3 iiii3 aaaa2 iiii2 0000 0000
and edx,0x00000F00 // edx = 0000 0000 0000 0000 0000 iiii2 0000 0000
shr edx,4 // edx = 0000 0000 0000 0000 0000 0000 iiii2 0000
or ebx,edx // ebx = aaaa3 iiii3 iiii3 iiii3 aaaa2 iiii2 iiii2 0000
shr edx,4 // edx = 0000 0000 0000 0000 0000 0000 0000 iiii2
or ebx,edx // ebx = aaaa3 iiii3 iiii3 iiii3 aaaa2 iiii2 iiii2 iiii2
mov [edi],ebx
add edi,4
dec ecx
jnz tc1_loop
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret
}
}
extern "C" void __declspec(naked) asmTexConv_A8_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int isize)
v3 = (uint32_t *)src;
v4 = (uint32_t *)dst;
v5 = size;
do
{
_asm {
align 4
push ebp
mov ebp, esp
push ebx
push esi
push edi
mov esi,[src]
mov edi,[dst]
mov ecx,[isize]
tc1_loop:
mov eax,[esi]
add esi,4
// aaaa3 aaaa3 aaaa2 aaaa2 aaaa1 aaaa1 aaaa0 aaaa0
// aaaa1 rrrr1 gggg1 bbbb1 aaaa0 rrrr0 gggg0 bbbb0
// aaaa3 rrrr3 gggg3 bbbb3 aaaa2 rrrr2 gggg2 bbbb2
mov edx,eax
and eax,0x0000F000 // eax = 00 00 00 00 a1 00 00 00
shl eax,16 // eax = a1 00 00 00 00 00 00 00
mov ebx,eax // ebx = a1 00 00 00 00 00 00 00
shr eax,4
or ebx,eax // ebx = a1 a1 00 00 00 00 00 00
shr eax,4
or ebx,eax // ebx = a1 a1 a1 00 00 00 00 00
shr eax,4
or ebx,eax // ebx = a1 a1 a1 a1 00 00 00 00
mov eax,edx
and eax,0x000000F0 // eax = 00 00 00 00 00 00 a0 00
shl eax,8 // eax = 00 00 00 00 a0 00 00 00
or ebx,eax
shr eax,4
or ebx,eax
shr eax,4
or ebx,eax
shr eax,4
or ebx,eax // ebx = a1 a1 a1 a1 a0 a0 a0 a0
mov [edi],ebx
add edi,4
mov eax,edx // eax = a3 a3 a2 a2 a1 a1 a0 a0
and eax,0xF0000000 // eax = a3 00 00 00 00 00 00 00
mov ebx,eax // ebx = a3 00 00 00 00 00 00 00
shr eax,4
or ebx,eax // ebx = a3 a3 00 00 00 00 00 00
shr eax,4
or ebx,eax // ebx = a3 a3 a3 00 00 00 00 00
shr eax,4
or ebx,eax // ebx = a3 a3 a3 a3 00 00 00 00
and edx,0x00F00000 // eax = 00 00 a2 00 00 00 00 00
shr edx,8 // eax = 00 00 00 00 a2 00 00 00
or ebx,edx
shr edx,4
or ebx,edx
shr edx,4
or ebx,edx
shr edx,4
or ebx,edx // ebx = a3 a3 a3 a3 a2 a2 a2 a2
mov [edi],ebx
add edi,4
dec ecx
jnz tc1_loop
pop edi
pop esi
pop ebx
mov esp, ebp
pop ebp
ret
v6 = *v3;
++v3;
v7 = v6;
*v4 = (16 * (v7 & 0xF000F0) >> 8) | (v7 & 0xF000F0) | (16 * (v7 & 0xF000F0)) | (v6 & 0xF000F000);
++v4;
--v5;
}
while ( v5 );
}
void TexConv_ARGB1555_ARGB4444 (wxUIntPtr src, wxUIntPtr dst, int width, int height)
static inline void texConv_AI44_ARGB4444(uint8_t *src, uint8_t *dst, int size)
{
uint32_t *v3;
uint32_t *v4;
int v5;
uint32_t v6;
uint32_t *v7;
v3 = (uint32_t *)src;
v4 = (uint32_t *)dst;
v5 = size;
do
{
v6 = *v3;
++v3;
*v4 = ((((uint16_t)v6 << 8) & 0xFF00 & 0xF00u) >> 8) | ((((uint16_t)v6 << 8) & 0xFF00 & 0xF00u) >> 4) | (uint16_t)(((uint16_t)v6 << 8) & 0xFF00) | (((v6 << 16) & 0xF000000) >> 8) | (((v6 << 16) & 0xF000000) >> 4) | ((v6 << 16) & 0xFF000000);
v7 = v4 + 1;
*v7 = (((v6 >> 8) & 0xF00) >> 8) | (((v6 >> 8) & 0xF00) >> 4) | ((v6 >> 8) & 0xFF00) | ((v6 & 0xF000000) >> 8) | ((v6 & 0xF000000) >> 4) | (v6 & 0xFF000000);
v4 = v7 + 1;
--v5;
}
while ( v5 );
}
static inline void texConv_A8_ARGB4444(uint8_t *src, uint8_t *dst, int size)
{
uint32_t *v3;
uint32_t *v4;
int v5;
uint32_t v6;
uint32_t v7;
uint32_t *v8;
v3 = (uint32_t *)src;
v4 = (uint32_t *)dst;
v5 = size;
do
{
v6 = *v3;
++v3;
v7 = v6;
*v4 = ((v6 & 0xF0) << 8 >> 12) | (uint8_t)(v6 & 0xF0) | (16 * (uint8_t)(v6 & 0xF0) & 0xFFFFFFF) | ((uint8_t)(v6 & 0xF0) << 8) | (16 * (uint16_t)(v6 & 0xF000) & 0xFFFFF) | (((uint16_t)(v6 & 0xF000) << 8) & 0xFFFFFF) | (((uint16_t)(v6 & 0xF000) << 12) & 0xFFFFFFF) | ((uint16_t)(v6 & 0xF000) << 16);
v8 = v4 + 1;
*v8 = ((v7 & 0xF00000) >> 20) | ((v7 & 0xF00000) >> 16) | ((v7 & 0xF00000) >> 12) | ((v7 & 0xF00000) >> 8) | ((v6 & 0xF0000000) >> 12) | ((v6 & 0xF0000000) >> 8) | ((v6 & 0xF0000000) >> 4) | (v6 & 0xF0000000);
v4 = v8 + 1;
--v5;
}
while ( v5 );
}
void TexConv_ARGB1555_ARGB4444 (unsigned char * src, unsigned char * dst, int width, int height)
{
int size = (width * height) >> 1; // Hiroshi Morii <koolsmoky@users.sourceforge.net>
// 2 pixels are converted in one loop
// NOTE: width * height must be a multiple of 2
asmTexConv_ARGB1555_ARGB4444(src, dst, size);
texConv_ARGB1555_ARGB4444(src, dst, size);
}
void TexConv_AI88_ARGB4444 (wxUIntPtr src, wxUIntPtr dst, int width, int height)
void TexConv_AI88_ARGB4444 (unsigned char * src, unsigned char * dst, int width, int height)
{
int size = (width * height) >> 1; // Hiroshi Morii <koolsmoky@users.sourceforge.net>
// 2 pixels are converted in one loop
// NOTE: width * height must be a multiple of 2
asmTexConv_AI88_ARGB4444(src, dst, size);
texConv_AI88_ARGB4444(src, dst, size);
}
void TexConv_AI44_ARGB4444 (wxUIntPtr src, wxUIntPtr dst, int width, int height)
void TexConv_AI44_ARGB4444 (unsigned char * src, unsigned char * dst, int width, int height)
{
int size = (width * height) >> 2; // Hiroshi Morii <koolsmoky@users.sourceforge.net>
// 4 pixels are converted in one loop
// NOTE: width * height must be a multiple of 4
asmTexConv_AI44_ARGB4444(src, dst, size);
texConv_AI44_ARGB4444(src, dst, size);
}
void TexConv_A8_ARGB4444 (wxUIntPtr src, wxUIntPtr dst, int width, int height)
void TexConv_A8_ARGB4444 (unsigned char * src, unsigned char * dst, int width, int height)
{
int size = (width * height) >> 2; // Hiroshi Morii <koolsmoky@users.sourceforge.net>
// 4 pixels are converted in one loop
// NOTE: width * height must be a multiple of 4
asmTexConv_A8_ARGB4444(src, dst, size);
texConv_A8_ARGB4444(src, dst, size);
}