Don't use lookup tables. It's better to use CPU registers and reduce memory accesses.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@3909 8ced0084-cf51-0410-be5f-012b33b47a6e
2009-07-30 20:29:52 +00:00 · 2009-07-30 20:29:52 +00:00 · 8ab4814d73
parent dcae5938c9
commit 8ab4814d73
8 changed files with 58 additions and 108 deletions
--- a/Source/Core/VideoCommon/Src/LookUpTables.cpp
+++ b/Source/Core/VideoCommon/Src/LookUpTables.cpp
@ -1,47 +0,0 @@
-// Copyright (C) 2003 Dolphin Project.
-
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU General Public License as published by
-// the Free Software Foundation, version 2.0.
-
-// This program is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License 2.0 for more details.
-
-// A copy of the GPL 2.0 should have been included with the program.
-// If not, see http://www.gnu.org/licenses/
-
-// Official SVN repository and contact information can be found at
-// http://code.google.com/p/dolphin-emu/
-
-#include "LookUpTables.h"
-
-const int lut3to8[] = { 0x00,0x24,0x48,0x6D,0x91,0xB6,0xDA,0xFF};
-const int lut4to8[] = { 0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77,
-                        0x88,0x99,0xAA,0xBB,0xCC,0xDD,0xEE,0xFF};
-const int lut5to8[] = { 0x00,0x08,0x10,0x18,0x20,0x29,0x31,0x39,
-                        0x41,0x4A,0x52,0x5A,0x62,0x6A,0x73,0x7B,
-                        0x83,0x8B,0x94,0x9C,0xA4,0xAC,0xB4,0xBD,
-                        0xC5,0xCD,0xD5,0xDE,0xE6,0xEE,0xF6,0xFF};
-int lut6to8[64];
-float lutu8tosfloat[256];
-float lutu8toufloat[256];
-float luts8tosfloat[256];
-float shiftLookup[32];
-
-void InitLUTs()
-{
-	for (int i = 0; i < 32; i++)
-		shiftLookup[i] = 1.0f / float(1 << i);
-
-	for (int i = 0; i < 64; i++)
-		lut6to8[i] = (i*255) / 63;
-
-	for (int i = 0; i < 256; i++)
-	{
-		lutu8tosfloat[i] = (float)(i - 128) / 127.0f;
-		lutu8toufloat[i] = (float)(i) / 255.0f;
-		luts8tosfloat[i] = ((float)(signed char)(char)i) / 127.0f;
-	}
-}
--- a/Source/Core/VideoCommon/Src/LookUpTables.h
+++ b/Source/Core/VideoCommon/Src/LookUpTables.h
@ -20,15 +20,28 @@

 #include "Common.h"

-extern const int lut3to8[8];
-extern const int lut4to8[16];
-extern const int lut5to8[32];
-extern int lut6to8[64];
-extern float lutu8tosfloat[256];
-extern float lutu8toufloat[256];
-extern float luts8tosfloat[256];
-extern float shiftLookup[32];
+inline u8 Convert3To8(u8 v)
+{
+	// Swizzle bits: 00000123 -> 12312312
+	return (v << 5) | (v << 2) | (v >> 1);
+}

-void InitLUTs();
+inline u8 Convert4To8(u8 v)
+{
+	// Swizzle bits: 00001234 -> 12341234
+	return (v << 4) | v;
+}
+
+inline u8 Convert5To8(u8 v)
+{
+	// Swizzle bits: 00012345 -> 12345123
+	return (v << 3) | (v >> 2);
+}
+
+inline u8 Convert6To8(u8 v)
+{
+	// Swizzle bits: 00123456 -> 12345612
+	return (v << 2) | (v >> 4);
+}

 #endif // _LOOKUPTABLES_H
--- a/Source/Core/VideoCommon/Src/TextureDecoder.cpp
+++ b/Source/Core/VideoCommon/Src/TextureDecoder.cpp
@ -150,16 +150,6 @@ int TexDecoder_GetPaletteSize(int format)
    }
 }

-inline u32 decode565(u16 val)
-{
-    int r,g,b,a;
-    r=lut5to8[(val>>11) & 0x1f];
-    g=lut6to8[(val>>5 ) & 0x3f];
-    b=lut5to8[(val    ) & 0x1f];
-    a=0xFF;
-    return (a << 24) | (r << 16) | (g << 8) | b;
-}
-
 inline u32 decodeIA8(u16 val)
 {
    int a = val >> 8;
@ -172,17 +162,17 @@ inline u32 decode5A3(u16 val)
    int r,g,b,a;
    if ((val & 0x8000))
    {
-        r=lut5to8[(val >> 10) & 0x1f];
-        g=lut5to8[(val >> 5 ) & 0x1f];
-        b=lut5to8[(val      ) & 0x1f];
-        a=0xFF;
+        a = 0xFF;
+		r = Convert5To8((val >> 10) & 0x1F);
+		g = Convert5To8((val >> 5) & 0x1F);
+		b = Convert5To8(val & 0x1F);
 	}
    else
    {
-        a=lut3to8[(val >> 12) & 0x7];
-        r=lut4to8[(val >> 8 ) & 0xf];
-        g=lut4to8[(val >> 4 ) & 0xf];
-        b=lut4to8[(val      ) & 0xf];
+		a = Convert3To8((val >> 12) & 0x7);
+		r = Convert4To8((val >> 8) & 0xF);
+		g = Convert4To8((val >> 4) & 0xF);
+		b = Convert4To8(val & 0xF);
    }
    return (a << 24) | (r << 16) | (g << 8) | b;
 }
@ -266,8 +256,8 @@ inline void decodebytesIA4(u16 *dst, const u8 *src)
 	for (int x = 0; x < 8; x++)
 	{
 		const u8 val = src[x];
-        const u8 a = lut4to8[val >> 4];
-        const u8 l = lut4to8[val & 0xF];
+		u8 a = Convert4To8(val >> 4);
+		u8 l = Convert4To8(val & 0xF);
 		dst[x] = (a << 8) | l;
 	}
 }
@ -305,12 +295,12 @@ void decodeDXTBlock(u32 *dst, const DXTBlock *src, int pitch)
 	// S3TC Decoder (Note: GCN decodes differently from PC)
    u16 c1 = Common::swap16(src->color1);
    u16 c2 = Common::swap16(src->color2);
-    int blue1 = lut5to8[c1 & 0x1F];
-    int blue2 = lut5to8[c2 & 0x1F];
-    int green1 = lut6to8[(c1 >> 5) & 0x3F];
-    int green2 = lut6to8[(c2 >> 5) & 0x3F];
-    int red1 = lut5to8[(c1 >> 11) & 0x1F];
-    int red2 = lut5to8[(c2 >> 11) & 0x1F];
+	u8 blue1 = Convert5To8(c1 & 0x1F);
+	u8 blue2 = Convert5To8(c2 & 0x1F);
+	u8 green1 = Convert6To8((c1 >> 5) & 0x3F);
+	u8 green2 = Convert6To8((c2 >> 5) & 0x3F);
+	u8 red1 = Convert5To8((c1 >> 11) & 0x1F);
+	u8 red2 = Convert5To8((c2 >> 11) & 0x1F);
    
    int colors[4];

@ -402,8 +392,8 @@ PC_TexFormat TexDecoder_Decode_real(u8 *dst, const u8 *src, int width, int heigh
 						for (int ix = 0; ix < 4; ix++)
 						{
 							int val = src[ix];
-							dst[(y + iy) * width + x + ix * 2] = lut4to8[val >> 4];
-							dst[(y + iy) * width + x + ix * 2 + 1] = lut4to8[val & 15];
+							dst[(y + iy) * width + x + ix * 2] = Convert4To8(val >> 4);
+							dst[(y + iy) * width + x + ix * 2 + 1] = Convert4To8(val & 0xF);
 						}
        }
       return PC_TEX_FMT_I4_AS_I8;
--- a/Source/Core/VideoCommon/Src/VertexLoader.cpp
+++ b/Source/Core/VideoCommon/Src/VertexLoader.cpp
@ -619,10 +619,10 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
 	m_VtxAttr.texCoord[7].Frac		= g_VtxAttr[vtx_attr_group].g2.Tex7Frac;

 	pVtxAttr = &m_VtxAttr;
-	posScale = shiftLookup[m_VtxAttr.PosFrac];
+	posScale = 1.0f / float(1 << m_VtxAttr.PosFrac);
 	if (m_NativeFmt->m_components & VB_HAS_UVALL)
 		for (int i = 0; i < 8; i++)
-			tcScale[i] = shiftLookup[m_VtxAttr.texCoord[i].Frac];
+			tcScale[i] = 1.0f / float(1 << m_VtxAttr.texCoord[i].Frac);
 	for (int i = 0; i < 2; i++)
 		colElements[i] = m_VtxAttr.color[i].Elements;

--- a/Source/Core/VideoCommon/Src/VertexLoader_Color.cpp
+++ b/Source/Core/VideoCommon/Src/VertexLoader_Color.cpp
@ -42,28 +42,28 @@ inline void _SetCol(u32 val)

 void _SetCol4444(u16 val)
 {
-	u32 col = lut4to8[(val>>0)&0xF]<<ASHIFT;
-	col    |= lut4to8[(val>>12)&0xF]   <<RSHIFT;
-	col    |= lut4to8[(val>>8)&0xF]    <<GSHIFT;
-	col    |= lut4to8[(val>>4)&0xF]    <<BSHIFT;
+	u32 col = Convert4To8(val & 0xF) << ASHIFT;
+	col |= Convert4To8((val >> 12) & 0xF) << RSHIFT;
+	col |= Convert4To8((val >> 8) & 0xF) << GSHIFT;
+	col |= Convert4To8((val >> 4) & 0xF) << BSHIFT;
 	_SetCol(col);
 }

 void _SetCol6666(u32 val)
 {
-	u32 col = lut6to8[(val>>18)&0x3F] << RSHIFT;
-	col    |= lut6to8[(val>>12)&0x3F] << GSHIFT;
-	col    |= lut6to8[(val>>6)&0x3F]  << BSHIFT;
-	col    |= lut6to8[(val>>0)&0x3F]  << ASHIFT;
+	u32 col = Convert6To8((val >> 18) & 0x3F) << RSHIFT;
+	col |= Convert6To8((val >> 12) & 0x3F) << GSHIFT;
+	col |= Convert6To8((val >> 6) & 0x3F) << BSHIFT;
+	col |= Convert6To8(val & 0x3F) << ASHIFT;
 	_SetCol(col);
 }

 void _SetCol565(u16 val)
 {
-	u32 col = lut5to8[(val>>11)&0x1f] << RSHIFT;
-	col     |= lut6to8[(val>>5 )&0x3f] << GSHIFT;
-	col     |= lut5to8[(val    )&0x1f] << BSHIFT;
-	_SetCol(col | (0xFF<<ASHIFT));
+	u32 col = Convert5To8((val >> 11) & 0x1F) << RSHIFT;
+	col |= Convert6To8((val >> 5) & 0x3F) << GSHIFT;
+	col |= Convert5To8(val & 0x1F) << BSHIFT;
+	_SetCol(col | (0xFF << ASHIFT));
 }
 //////////////////////////////////////////////////////////////////////////
 //////////////////////////////////////////////////////////////////////////
--- a/Source/Core/VideoCommon/VideoCommon.vcproj
+++ b/Source/Core/VideoCommon/VideoCommon.vcproj
@ -528,10 +528,6 @@
 				RelativePath=".\Src\IndexGenerator.h"
 				>
 			</File>
-			<File
-				RelativePath=".\Src\LookUpTables.cpp"
-				>
-			</File>
 			<File
 				RelativePath=".\Src\LookUpTables.h"
 				>
--- a/Source/Plugins/Plugin_VideoDX9/Src/main.cpp
+++ b/Source/Plugins/Plugin_VideoDX9/Src/main.cpp
@ -137,7 +137,6 @@ bool Init()
 			MessageBox(GetActiveWindow(), "Unable to initialize Direct3D. Please make sure that you have DirectX 9.0c correctly installed.", "Fatal Error", MB_OK);
 			return false;
 		}
-		InitLUTs();
 		InitXFBConvTables();
 	}
 	initCount++;
--- a/Source/Plugins/Plugin_VideoOGL/Src/main.cpp
+++ b/Source/Plugins/Plugin_VideoOGL/Src/main.cpp
@ -314,7 +314,6 @@ void Initialize(void *init)
    frameCount = 0;
    SVideoInitialize *_pVideoInitialize = (SVideoInitialize*)init;
    g_VideoInitialize = *(_pVideoInitialize); // Create a shortcut to _pVideoInitialize that can also update it
-    InitLUTs();
 	InitXFBConvTables();
    g_Config.Load();