[Glide64] Remove asm from TextConv.h

2015-10-14 11:38:47 +11:00 · 2015-10-14 11:38:47 +11:00 · dd1c306d16
parent 3d52a163cd
commit dd1c306d16
2 changed files with 115 additions and 312 deletions
--- a/Source/Glide64/TexCache.cpp
+++ b/Source/Glide64/TexCache.cpp
@ -149,56 +149,27 @@ void ClearCache ()
 }

 //****************************************************************
-// GetTexInfo - gets information for either t0 or t1, checks if in cache & fills tex_found
-
-extern "C" __declspec(naked) int asmTextureCRC(int addr, int width, int height, int line)
+uint32_t textureCRC(uint8_t *addr, int width, int height, int line)
 {
-	_asm {
-		align 4
-		push ebp
-		mov ebp, esp
+  uint32_t crc = 0;
+  uint32_t *pixelpos;
+  unsigned int i;
+  uint64_t twopixel_crc;

-        push ebx
-        push edi
+  pixelpos = (uint32_t*)addr;
+  for (; height; height--) {
+    for (i = width; i; --i) {
+      twopixel_crc = i * (uint64_t)(pixelpos[1] + pixelpos[0] + crc);
+      crc = (uint32_t) ((twopixel_crc >> 32) + twopixel_crc);
+      pixelpos += 2;
+    }
+    crc = ((unsigned int)height * (uint64_t)crc >> 32) + height * crc;
+    pixelpos = (uint32_t *)((char *)pixelpos + line);
+  }

-        xor eax,eax                             // eax is final result
-        mov ebx,[line]
-        mov ecx,[height]                // ecx is height counter
-        mov edi,[addr]                  // edi is ptr to texture memory
-crc_loop_y:
-        push ecx
-
-        mov ecx,[width]
-crc_loop_x:
-
-        add eax,[edi]           // MUST be 64-bit aligned, so manually unroll
-        add eax,[edi+4]
-        mov edx,ecx
-        mul edx
-        add eax,edx
-        add edi,8
-
-        dec ecx
-        jnz crc_loop_x
-
-        pop ecx
-
-        mov edx,ecx
-        mul edx
-        add eax,edx
-
-        add edi,ebx
-
-        dec ecx
-        jnz crc_loop_y
-
-        pop edi
-        pop ebx
-		mov esp, ebp
-		pop ebp
-		ret
-	}
+  return crc;
 }
+// GetTexInfo - gets information for either t0 or t1, checks if in cache & fills tex_found

 void GetTexInfo (int id, int tile)
 {
@ -388,17 +359,17 @@ void GetTexInfo (int id, int tile)
  {
    line = (line - wid_64) << 3;
    if (wid_64 < 1) wid_64 = 1;
-    wxUIntPtr addr = wxPtrToUInt(rdp.tmem) + (rdp.tiles[tile].t_mem<<3);
+    uint8_t * addr = (((uint8_t*)rdp.tmem) + (rdp.tiles[tile].t_mem<<3));
    if (crc_height > 0) // Check the CRC
    {
      if (rdp.tiles[tile].size < 3)
-        crc = asmTextureCRC(addr, wid_64, crc_height, line);
+        crc = textureCRC(addr, wid_64, crc_height, line);
      else //32b texture
      {
        int line_2 = line >> 1;
        int wid_64_2 = max(1, wid_64 >> 1);
-        crc = asmTextureCRC(addr, wid_64_2, crc_height, line_2);
-        crc += asmTextureCRC(addr+0x800, wid_64_2, crc_height, line_2);
+        crc = textureCRC(addr, wid_64_2, crc_height, line_2);
+        crc += textureCRC(addr+0x800, wid_64_2, crc_height, line_2);
      }
    }
  }
@ -1087,8 +1058,8 @@ void LoadTex (int id, int tmu)
  cache->f_mirror_t = FALSE;
  cache->f_wrap_s = FALSE;
  cache->f_wrap_t = FALSE;
-  cache->is_hires_tex = FALSE;
 #ifdef TEXTURE_FILTER
+  cache->is_hires_tex = FALSE;
  cache->ricecrc    = texinfo[id].ricecrc;
 #endif

@ -1534,22 +1505,22 @@ void LoadTex (int id, int tmu)
      // Convert the texture to ARGB 4444
      if (LOWORD(result) == GR_TEXFMT_ARGB_1555)
      {
-        TexConv_ARGB1555_ARGB4444 (wxPtrToUInt(texture), wxPtrToUInt(tex2), real_x, real_y);
+        TexConv_ARGB1555_ARGB4444 ((texture), (tex2), real_x, real_y);
        texture = tex2;
      }
      else if (LOWORD(result) == GR_TEXFMT_ALPHA_INTENSITY_88)
      {
-        TexConv_AI88_ARGB4444 (wxPtrToUInt(texture), wxPtrToUInt(tex2), real_x, real_y);
+        TexConv_AI88_ARGB4444 ((texture), (tex2), real_x, real_y);
        texture = tex2;
      }
      else if (LOWORD(result) == GR_TEXFMT_ALPHA_INTENSITY_44)
      {
-        TexConv_AI44_ARGB4444 (wxPtrToUInt(texture), wxPtrToUInt(tex2), real_x, real_y);
+        TexConv_AI44_ARGB4444 ((texture), (tex2), real_x, real_y);
        texture = tex2;
      }
      else if (LOWORD(result) == GR_TEXFMT_ALPHA_8)
      {
-        TexConv_A8_ARGB4444 (wxPtrToUInt(texture), wxPtrToUInt(tex2), real_x, real_y);
+        TexConv_A8_ARGB4444 ((texture), (tex2), real_x, real_y);
        texture = tex2;
      }
      /*else if (LOWORD(result) == GR_TEXFMT_ARGB_4444)
--- a/Source/Glide64/TexConv.h
+++ b/Source/Glide64/TexConv.h
@ -37,299 +37,131 @@
 //
 //****************************************************************

-extern "C" void  __declspec(naked) asmTexConv_ARGB1555_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int isize)
+static inline void texConv_ARGB1555_ARGB4444(uint8_t *src, uint8_t *dst, int size)
 {
-	_asm {
-		align 4
-		push ebp
-		mov ebp, esp
-        push ebx
-        push esi
-        push edi
+  uint32_t *v3;
+  uint32_t *v4;
+  int v5;
+  uint32_t v6;
+  uint32_t v7;

-        mov esi,[src]
-        mov edi,[dst]
-        mov ecx,[isize]
-
-tc1_loop:
-        mov eax,[esi]
-        add esi,4
-
-        // arrr rrgg gggb bbbb
-        // aaaa rrrr gggg bbbb
-        mov edx,eax
-        and eax,0x80008000
-        mov ebx,eax                             // ebx = 0xa000000000000000
-        shr eax,1
-        or ebx,eax                              // ebx = 0xaa00000000000000
-        shr eax,1
-        or ebx,eax                              // ebx = 0xaaa0000000000000
-        shr eax,1
-        or ebx,eax                              // ebx = 0xaaaa000000000000
-
-        mov eax,edx
-        and eax,0x78007800              // eax = 0x0rrrr00000000000
-        shr eax,3                               // eax = 0x0000rrrr00000000
-        or ebx,eax                              // ebx = 0xaaaarrrr00000000
-
-        mov eax,edx
-        and eax,0x03c003c0              // eax = 0x000000gggg000000
-        shr eax,2                               // eax = 0x00000000gggg0000
-        or ebx,eax                              // ebx = 0xaaaarrrrgggg0000
-
-        and edx,0x001e001e              // edx = 0x00000000000bbbb0
-        shr edx,1                               // edx = 0x000000000000bbbb
-        or ebx,edx                              // ebx = 0xaaaarrrrggggbbbb
-
-        mov [edi],ebx
-        add edi,4
-
-        dec ecx
-        jnz tc1_loop
-
-        pop edi
-        pop esi
-        pop ebx
-		mov esp, ebp
-		pop ebp
-		ret
-	}
+  v3 = (uint32_t *)src;
+  v4 = (uint32_t *)dst;
+  v5 = size;
+  do
+  {
+    v6 = *v3;
+    ++v3;
+    v7 = v6;
+    *v4 = ((v7 & 0x1E001E) >> 1) | ((v6 & 0x3C003C0) >> 2) | ((v6 & 0x78007800) >> 3) | ((v6 & 0x80008000) >> 3) | ((v6 & 0x80008000) >> 2) | ((v6 & 0x80008000) >> 1) | (v6 & 0x80008000);
+    ++v4;
+    --v5;
+  }
+  while ( v5 );
 }

-extern "C" void  __declspec(naked) asmTexConv_AI88_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int isize)
+static inline void texConv_AI88_ARGB4444(uint8_t *src, uint8_t *dst, int size)
 {
-	_asm {
-		align 4
-		push ebp
-		mov ebp, esp
-        push ebx
-        push esi
-        push edi
+  uint32_t *v3;
+  uint32_t *v4;
+  int v5;
+  uint32_t v6;
+  uint32_t v7;

-        mov esi,[src]
-        mov edi,[dst]
-        mov ecx,[isize]
-
-tc1_loop:
-        mov eax,[esi]
-        add esi,4
-
-        // aaaa aaaa iiii iiii
-        // aaaa rrrr gggg bbbb
-        mov edx,eax
-        and eax,0xF000F000              // eax = 0xaaaa000000000000
-        mov ebx,eax                             // ebx = 0xaaaa000000000000
-
-        and edx,0x00F000F0              // edx = 0x00000000iiii0000
-        shl edx,4                               // edx = 0x0000iiii00000000
-        or ebx,edx                              // ebx = 0xaaaaiiii00000000
-        shr edx,4                               // edx = 0x00000000iiii0000
-        or ebx,edx                              // ebx = 0xaaaaiiiiiiii0000
-        shr edx,4                               // edx = 0x000000000000iiii
-        or ebx,edx                              // ebx = 0xaaaaiiiiiiiiiiii
-
-        mov [edi],ebx
-        add edi,4
-
-        dec ecx
-        jnz tc1_loop
-
-        pop edi
-        pop esi
-        pop ebx
-		mov esp, ebp
-		pop ebp
-		ret
-	}
+  v3 = (uint32_t *)src;
+  v4 = (uint32_t *)dst;
+  v5 = size;
+  do
+  {
+    v6 = *v3;
+    ++v3;
+    v7 = v6;
+    *v4 = (16 * (v7 & 0xF000F0) >> 8) | (v7 & 0xF000F0) | (16 * (v7 & 0xF000F0)) | (v6 & 0xF000F000);
+    ++v4;
+    --v5;
+  }
+  while ( v5 );
 }

-extern "C" void  __declspec(naked) asmTexConv_AI44_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int isize)
+static inline void texConv_AI44_ARGB4444(uint8_t *src, uint8_t *dst, int size)
 {
-	_asm {
-		align 4
-		push ebp
-		mov ebp, esp
-        push ebx
-        push esi
-        push edi
+  uint32_t *v3;
+  uint32_t *v4;
+  int v5;
+  uint32_t v6;
+  uint32_t *v7;

-        mov esi,[src]
-        mov edi,[dst]
-        mov ecx,[isize]
-
-tc1_loop:
-        mov eax,[esi]
-        add esi,4
-
-        // aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0
-        // aaaa1 rrrr1 gggg1 bbbb1 aaaa0 rrrr0 gggg0 bbbb0
-        // aaaa3 rrrr3 gggg3 bbbb3 aaaa2 rrrr2 gggg2 bbbb2
-        mov edx,eax                             // eax = aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0
-        shl eax,16                              // eax = aaaa1 iiii1 aaaa0 iiii0 0000  0000  0000  0000
-        and eax,0xFF000000              // eax = aaaa1 iiii1 0000  0000  0000  0000  0000  0000
-        mov ebx,eax                             // ebx = aaaa1 iiii1 0000  0000  0000  0000  0000  0000
-        and eax,0x0F000000              // eax = 0000  iiii1 0000  0000  0000  0000  0000  0000
-        shr eax,4                               // eax = 0000  0000  iiii1 0000  0000  0000  0000  0000
-        or ebx,eax                              // ebx = aaaa1 iiii1 iiii1 0000  0000  0000  0000  0000
-        shr eax,4                               // eax = 0000  0000  0000  iiii1 0000  0000  0000  0000
-        or ebx,eax                              // ebx = aaaa1 iiii1 iiii1 iiii1 0000  0000  0000  0000
-
-        mov eax,edx                             // eax = aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0
-        shl eax,8                               // eax = aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0 0000  0000
-        and eax,0x0000FF00              // eax = 0000  0000  0000  0000  aaaa0 iiii0 0000  0000
-        or ebx,eax                              // ebx = aaaa1 iiii1 iiii1 iiii1 aaaa0 iiii0 0000  0000
-        and eax,0x00000F00              // eax = 0000  0000  0000  0000  0000  iiii0 0000  0000
-        shr eax,4                               // eax = 0000  0000  0000  0000  0000  0000  iiii0 0000
-        or ebx,eax                              // ebx = aaaa1 iiii1 iiii1 iiii1 aaaa0 iiii0 iiii0 0000
-        shr eax,4                               // eax = 0000  0000  0000  0000  0000  0000  0000  iiii0
-        or ebx,eax                              // ebx = aaaa1 iiii1 iiii1 iiii1 aaaa0 iiii0 iiii0 iiii0
-
-        mov [edi],ebx
-        add edi,4
-
-        mov eax,edx                             // eax = aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0
-        and eax,0xFF000000              // eax = aaaa3 iiii3 0000  0000  0000  0000  0000  0000
-        mov ebx,eax                             // ebx = aaaa3 iiii3 0000  0000  0000  0000  0000  0000
-        and eax,0x0F000000              // eax = 0000  iiii3 0000  0000  0000  0000  0000  0000
-        shr eax,4                               // eax = 0000  0000  iiii3 0000  0000  0000  0000  0000
-        or ebx,eax                              // ebx = aaaa3 iiii3 iiii3 0000  0000  0000  0000  0000
-        shr eax,4                               // eax = 0000  0000  0000  iiii3 0000  0000  0000  0000
-        or ebx,eax                              // ebx = aaaa3 iiii3 iiii3 iiii3 0000  0000  0000  0000
-
-                                                        // edx = aaaa3 iiii3 aaaa2 iiii2 aaaa1 iiii1 aaaa0 iiii0
-        shr edx,8                               // edx = 0000  0000  aaaa3 aaaa3 aaaa2 iiii2 aaaa1 iiii1
-        and edx,0x0000FF00              // edx = 0000  0000  0000  0000  aaaa2 iiii2 0000  0000
-        or ebx,edx                              // ebx = aaaa3 iiii3 iiii3 iiii3 aaaa2 iiii2 0000  0000
-        and edx,0x00000F00              // edx = 0000  0000  0000  0000  0000  iiii2 0000  0000
-        shr edx,4                               // edx = 0000  0000  0000  0000  0000  0000  iiii2 0000
-        or ebx,edx                              // ebx = aaaa3 iiii3 iiii3 iiii3 aaaa2 iiii2 iiii2 0000
-        shr edx,4                               // edx = 0000  0000  0000  0000  0000  0000  0000  iiii2
-        or ebx,edx                              // ebx = aaaa3 iiii3 iiii3 iiii3 aaaa2 iiii2 iiii2 iiii2
-
-        mov [edi],ebx
-        add edi,4
-
-        dec ecx
-        jnz tc1_loop
-
-        pop edi
-        pop esi
-        pop ebx
-		mov esp, ebp
-		pop ebp
-		ret
-	}
+  v3 = (uint32_t *)src;
+  v4 = (uint32_t *)dst;
+  v5 = size;
+  do
+  {
+    v6 = *v3;
+    ++v3;
+    *v4 = ((((uint16_t)v6 << 8) & 0xFF00 & 0xF00u) >> 8) | ((((uint16_t)v6 << 8) & 0xFF00 & 0xF00u) >> 4) | (uint16_t)(((uint16_t)v6 << 8) & 0xFF00) | (((v6 << 16) & 0xF000000) >> 8) | (((v6 << 16) & 0xF000000) >> 4) | ((v6 << 16) & 0xFF000000);
+    v7 = v4 + 1;
+    *v7 = (((v6 >> 8) & 0xF00) >> 8) | (((v6 >> 8) & 0xF00) >> 4) | ((v6 >> 8) & 0xFF00) | ((v6 & 0xF000000) >> 8) | ((v6 & 0xF000000) >> 4) | (v6 & 0xFF000000);
+    v4 = v7 + 1;
+    --v5;
+  }
+  while ( v5 );
 }

-extern "C" void  __declspec(naked) asmTexConv_A8_ARGB4444(wxUIntPtr src, wxUIntPtr dst, int isize)
+static inline void texConv_A8_ARGB4444(uint8_t *src, uint8_t *dst, int size)
 {
-	_asm {
-		align 4
-		push ebp
-		mov ebp, esp
-        push ebx
-        push esi
-        push edi
+  uint32_t *v3;
+  uint32_t *v4;
+  int v5;
+  uint32_t v6;
+  uint32_t v7;
+  uint32_t *v8;

-        mov esi,[src]
-        mov edi,[dst]
-        mov ecx,[isize]
-
-tc1_loop:
-        mov eax,[esi]
-        add esi,4
-
-        // aaaa3 aaaa3 aaaa2 aaaa2 aaaa1 aaaa1 aaaa0 aaaa0
-        // aaaa1 rrrr1 gggg1 bbbb1 aaaa0 rrrr0 gggg0 bbbb0
-        // aaaa3 rrrr3 gggg3 bbbb3 aaaa2 rrrr2 gggg2 bbbb2
-        mov edx,eax
-        and eax,0x0000F000              // eax = 00 00 00 00 a1 00 00 00
-        shl eax,16                              // eax = a1 00 00 00 00 00 00 00
-        mov ebx,eax                             // ebx = a1 00 00 00 00 00 00 00
-        shr eax,4
-        or ebx,eax                              // ebx = a1 a1 00 00 00 00 00 00
-        shr eax,4
-        or ebx,eax                              // ebx = a1 a1 a1 00 00 00 00 00
-        shr eax,4
-        or ebx,eax                              // ebx = a1 a1 a1 a1 00 00 00 00
-
-        mov eax,edx
-        and eax,0x000000F0              // eax = 00 00 00 00 00 00 a0 00
-        shl eax,8                               // eax = 00 00 00 00 a0 00 00 00
-        or ebx,eax
-        shr eax,4
-        or ebx,eax
-        shr eax,4
-        or ebx,eax
-        shr eax,4
-        or ebx,eax                              // ebx = a1 a1 a1 a1 a0 a0 a0 a0
-
-        mov [edi],ebx
-        add edi,4
-
-        mov eax,edx                             // eax = a3 a3 a2 a2 a1 a1 a0 a0
-        and eax,0xF0000000              // eax = a3 00 00 00 00 00 00 00
-        mov ebx,eax                             // ebx = a3 00 00 00 00 00 00 00
-        shr eax,4
-        or ebx,eax                              // ebx = a3 a3 00 00 00 00 00 00
-        shr eax,4
-        or ebx,eax                              // ebx = a3 a3 a3 00 00 00 00 00
-        shr eax,4
-        or ebx,eax                              // ebx = a3 a3 a3 a3 00 00 00 00
-
-        and edx,0x00F00000              // eax = 00 00 a2 00 00 00 00 00
-        shr edx,8                               // eax = 00 00 00 00 a2 00 00 00
-        or ebx,edx
-        shr edx,4
-        or ebx,edx
-        shr edx,4
-        or ebx,edx
-        shr edx,4
-        or ebx,edx                              // ebx = a3 a3 a3 a3 a2 a2 a2 a2
-
-        mov [edi],ebx
-        add edi,4
-
-        dec ecx
-        jnz tc1_loop
-
-        pop edi
-        pop esi
-        pop ebx
-		mov esp, ebp
-		pop ebp
-		ret
-	}
+  v3 = (uint32_t *)src;
+  v4 = (uint32_t *)dst;
+  v5 = size;
+  do
+  {
+    v6 = *v3;
+    ++v3;
+    v7 = v6;
+    *v4 = ((v6 & 0xF0) << 8 >> 12) | (uint8_t)(v6 & 0xF0) | (16 * (uint8_t)(v6 & 0xF0) & 0xFFFFFFF) | ((uint8_t)(v6 & 0xF0) << 8) | (16 * (uint16_t)(v6 & 0xF000) & 0xFFFFF) | (((uint16_t)(v6 & 0xF000) << 8) & 0xFFFFFF) | (((uint16_t)(v6 & 0xF000) << 12) & 0xFFFFFFF) | ((uint16_t)(v6 & 0xF000) << 16);
+    v8 = v4 + 1;
+    *v8 = ((v7 & 0xF00000) >> 20) | ((v7 & 0xF00000) >> 16) | ((v7 & 0xF00000) >> 12) | ((v7 & 0xF00000) >> 8) | ((v6 & 0xF0000000) >> 12) | ((v6 & 0xF0000000) >> 8) | ((v6 & 0xF0000000) >> 4) | (v6 & 0xF0000000);
+    v4 = v8 + 1;
+    --v5;
+  }
+  while ( v5 );
 }

-void TexConv_ARGB1555_ARGB4444 (wxUIntPtr src, wxUIntPtr dst, int width, int height)
+void TexConv_ARGB1555_ARGB4444 (unsigned char * src, unsigned char * dst, int width, int height)
 {
  int size = (width * height) >> 1;	// Hiroshi Morii <koolsmoky@users.sourceforge.net>
  // 2 pixels are converted in one loop
  // NOTE: width * height must be a multiple of 2
-  asmTexConv_ARGB1555_ARGB4444(src, dst, size);
+  texConv_ARGB1555_ARGB4444(src, dst, size);
 }

-void TexConv_AI88_ARGB4444 (wxUIntPtr src, wxUIntPtr dst, int width, int height)
+void TexConv_AI88_ARGB4444 (unsigned char * src, unsigned char * dst, int width, int height)
 {
  int size = (width * height) >> 1;	// Hiroshi Morii <koolsmoky@users.sourceforge.net>
  // 2 pixels are converted in one loop
  // NOTE: width * height must be a multiple of 2
-  asmTexConv_AI88_ARGB4444(src, dst, size);
+  texConv_AI88_ARGB4444(src, dst, size);
 }

-void TexConv_AI44_ARGB4444 (wxUIntPtr src, wxUIntPtr dst, int width, int height)
+void TexConv_AI44_ARGB4444 (unsigned char * src, unsigned char * dst, int width, int height)
 {
  int size = (width * height) >> 2;	// Hiroshi Morii <koolsmoky@users.sourceforge.net>
  // 4 pixels are converted in one loop
  // NOTE: width * height must be a multiple of 4
-  asmTexConv_AI44_ARGB4444(src, dst, size);
+  texConv_AI44_ARGB4444(src, dst, size);
 }

-void TexConv_A8_ARGB4444 (wxUIntPtr src, wxUIntPtr dst, int width, int height)
+void TexConv_A8_ARGB4444 (unsigned char * src, unsigned char * dst, int width, int height)
 {
  int size = (width * height) >> 2;	// Hiroshi Morii <koolsmoky@users.sourceforge.net>
  // 4 pixels are converted in one loop
  // NOTE: width * height must be a multiple of 4
-  asmTexConv_A8_ARGB4444(src, dst, size);
+  texConv_A8_ARGB4444(src, dst, size);
 }
+