Don't use lookup tables. It's better to use CPU registers and reduce memory accesses.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@3909 8ced0084-cf51-0410-be5f-012b33b47a6e
2009-07-30 20:29:52 +00:00 · 2009-07-30 20:29:52 +00:00 · 8ab4814d73
parent dcae5938c9
commit 8ab4814d73
8 changed files with 58 additions and 108 deletions
--- a/Source/Core/VideoCommon/Src/LookUpTables.cpp
+++ b/Source/Core/VideoCommon/Src/LookUpTables.cpp
@ -1,47 +0,0 @@
 // Copyright (C) 2003 Dolphin Project.
 // This program is free software: you can redistribute it and/or modify
 // it under the terms of the GNU General Public License as published by
 // the Free Software Foundation, version 2.0.
 // This program is distributed in the hope that it will be useful,
 // but WITHOUT ANY WARRANTY; without even the implied warranty of
 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 // GNU General Public License 2.0 for more details.
 // A copy of the GPL 2.0 should have been included with the program.
 // If not, see http://www.gnu.org/licenses/
 // Official SVN repository and contact information can be found at
 // http://code.google.com/p/dolphin-emu/
 #include "LookUpTables.h"
 const int lut3to8[] = { 0x00,0x24,0x48,0x6D,0x91,0xB6,0xDA,0xFF};
 const int lut4to8[] = { 0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77,
                        0x88,0x99,0xAA,0xBB,0xCC,0xDD,0xEE,0xFF};
 const int lut5to8[] = { 0x00,0x08,0x10,0x18,0x20,0x29,0x31,0x39,
                        0x41,0x4A,0x52,0x5A,0x62,0x6A,0x73,0x7B,
                        0x83,0x8B,0x94,0x9C,0xA4,0xAC,0xB4,0xBD,
                        0xC5,0xCD,0xD5,0xDE,0xE6,0xEE,0xF6,0xFF};
 int lut6to8[64];
 float lutu8tosfloat[256];
 float lutu8toufloat[256];
 float luts8tosfloat[256];
 float shiftLookup[32];
 void InitLUTs()
 {
 	for (int i = 0; i < 32; i++)
 		shiftLookup[i] = 1.0f / float(1 << i);
 	for (int i = 0; i < 64; i++)
 		lut6to8[i] = (i*255) / 63;
 	for (int i = 0; i < 256; i++)
 	{
 		lutu8tosfloat[i] = (float)(i - 128) / 127.0f;
 		lutu8toufloat[i] = (float)(i) / 255.0f;
 		luts8tosfloat[i] = ((float)(signed char)(char)i) / 127.0f;
 	}
 }
--- a/Source/Core/VideoCommon/Src/LookUpTables.h
+++ b/Source/Core/VideoCommon/Src/LookUpTables.h
@ -20,15 +20,28 @@
 #include "Common.h"
-extern const int lut3to8[8];
+inline u8 Convert3To8(u8 v)
-extern const int lut4to8[16];
+{
-extern const int lut5to8[32];
+	// Swizzle bits: 00000123 -> 12312312
-extern int lut6to8[64];
+	return (v << 5) | (v << 2) | (v >> 1);
-extern float lutu8tosfloat[256];
+}
 extern float lutu8toufloat[256];
 extern float luts8tosfloat[256];
 extern float shiftLookup[32];
-void InitLUTs();
+inline u8 Convert4To8(u8 v)
 {
 	// Swizzle bits: 00001234 -> 12341234
 	return (v << 4) | v;
 }
 inline u8 Convert5To8(u8 v)
 {
 	// Swizzle bits: 00012345 -> 12345123
 	return (v << 3) | (v >> 2);
 }
 inline u8 Convert6To8(u8 v)
 {
 	// Swizzle bits: 00123456 -> 12345612
 	return (v << 2) | (v >> 4);
 }
 #endif // _LOOKUPTABLES_H
--- a/Source/Core/VideoCommon/Src/TextureDecoder.cpp
+++ b/Source/Core/VideoCommon/Src/TextureDecoder.cpp
@ -150,16 +150,6 @@ int TexDecoder_GetPaletteSize(int format)
    }
 }
 inline u32 decode565(u16 val)
 {
    int r,g,b,a;
    r=lut5to8[(val>>11) & 0x1f];
    g=lut6to8[(val>>5 ) & 0x3f];
    b=lut5to8[(val    ) & 0x1f];
    a=0xFF;
    return (a << 24) | (r << 16) | (g << 8) | b;
 }
 inline u32 decodeIA8(u16 val)
 {
    int a = val >> 8;
@ -172,17 +162,17 @@ inline u32 decode5A3(u16 val)
    int r,g,b,a;
    if ((val & 0x8000))
    {
-        r=lut5to8[(val >> 10) & 0x1f];
+        a = 0xFF;
-        g=lut5to8[(val >> 5 ) & 0x1f];
+		r = Convert5To8((val >> 10) & 0x1F);
-        b=lut5to8[(val      ) & 0x1f];
+		g = Convert5To8((val >> 5) & 0x1F);
-        a=0xFF;
+		b = Convert5To8(val & 0x1F);
 	}
    else
    {
-        a=lut3to8[(val >> 12) & 0x7];
+		a = Convert3To8((val >> 12) & 0x7);
-        r=lut4to8[(val >> 8 ) & 0xf];
+		r = Convert4To8((val >> 8) & 0xF);
-        g=lut4to8[(val >> 4 ) & 0xf];
+		g = Convert4To8((val >> 4) & 0xF);
-        b=lut4to8[(val      ) & 0xf];
+		b = Convert4To8(val & 0xF);
    }
    return (a << 24) | (r << 16) | (g << 8) | b;
 }
@ -263,13 +253,13 @@ inline void decodebytesC14X2_To_Raw16(u16* dst, const u16* src, int tlutaddr)
 //inline void decodebytesIA4(u16 *dst, const u8 *src, int numbytes)
 inline void decodebytesIA4(u16 *dst, const u8 *src)
 {
-    for (int x = 0; x < 8; x++)
+	for (int x = 0; x < 8; x++)
-    {
+	{
 		const u8 val = src[x];
-        const u8 a = lut4to8[val >> 4];
+		u8 a = Convert4To8(val >> 4);
-        const u8 l = lut4to8[val & 0xF];
+		u8 l = Convert4To8(val & 0xF);
-        dst[x] = (a << 8) | l;
+		dst[x] = (a << 8) | l;
-    }
+	}
 }
 //inline void decodebytesRGB5A3(u32 *dst, const u16 *src, int numpixels)
@ -305,12 +295,12 @@ void decodeDXTBlock(u32 *dst, const DXTBlock *src, int pitch)
 	// S3TC Decoder (Note: GCN decodes differently from PC)
    u16 c1 = Common::swap16(src->color1);
    u16 c2 = Common::swap16(src->color2);
-    int blue1 = lut5to8[c1 & 0x1F];
+	u8 blue1 = Convert5To8(c1 & 0x1F);
-    int blue2 = lut5to8[c2 & 0x1F];
+	u8 blue2 = Convert5To8(c2 & 0x1F);
-    int green1 = lut6to8[(c1 >> 5) & 0x3F];
+	u8 green1 = Convert6To8((c1 >> 5) & 0x3F);
-    int green2 = lut6to8[(c2 >> 5) & 0x3F];
+	u8 green2 = Convert6To8((c2 >> 5) & 0x3F);
-    int red1 = lut5to8[(c1 >> 11) & 0x1F];
+	u8 red1 = Convert5To8((c1 >> 11) & 0x1F);
-    int red2 = lut5to8[(c2 >> 11) & 0x1F];
+	u8 red2 = Convert5To8((c2 >> 11) & 0x1F);
    int colors[4];
@ -402,8 +392,8 @@ PC_TexFormat TexDecoder_Decode_real(u8 *dst, const u8 *src, int width, int heigh
 						for (int ix = 0; ix < 4; ix++)
 						{
 							int val = src[ix];
-							dst[(y + iy) * width + x + ix * 2] = lut4to8[val >> 4];
+							dst[(y + iy) * width + x + ix * 2] = Convert4To8(val >> 4);
-							dst[(y + iy) * width + x + ix * 2 + 1] = lut4to8[val & 15];
+							dst[(y + iy) * width + x + ix * 2 + 1] = Convert4To8(val & 0xF);
 						}
        }
       return PC_TEX_FMT_I4_AS_I8;
--- a/Source/Core/VideoCommon/Src/VertexLoader.cpp
+++ b/Source/Core/VideoCommon/Src/VertexLoader.cpp
@ -619,10 +619,10 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
 	m_VtxAttr.texCoord[7].Frac		= g_VtxAttr[vtx_attr_group].g2.Tex7Frac;
 	pVtxAttr = &m_VtxAttr;
-	posScale = shiftLookup[m_VtxAttr.PosFrac];
+	posScale = 1.0f / float(1 << m_VtxAttr.PosFrac);
 	if (m_NativeFmt->m_components & VB_HAS_UVALL)
 		for (int i = 0; i < 8; i++)
-			tcScale[i] = shiftLookup[m_VtxAttr.texCoord[i].Frac];
+			tcScale[i] = 1.0f / float(1 << m_VtxAttr.texCoord[i].Frac);
 	for (int i = 0; i < 2; i++)
 		colElements[i] = m_VtxAttr.color[i].Elements;
--- a/Source/Core/VideoCommon/Src/VertexLoader_Color.cpp
+++ b/Source/Core/VideoCommon/Src/VertexLoader_Color.cpp
@ -42,28 +42,28 @@ inline void _SetCol(u32 val)
 void _SetCol4444(u16 val)
 {
-	u32 col = lut4to8[(val>>0)&0xF]<<ASHIFT;
+	u32 col = Convert4To8(val & 0xF) << ASHIFT;
-	col    |= lut4to8[(val>>12)&0xF]   <<RSHIFT;
+	col |= Convert4To8((val >> 12) & 0xF) << RSHIFT;
-	col    |= lut4to8[(val>>8)&0xF]    <<GSHIFT;
+	col |= Convert4To8((val >> 8) & 0xF) << GSHIFT;
-	col    |= lut4to8[(val>>4)&0xF]    <<BSHIFT;
+	col |= Convert4To8((val >> 4) & 0xF) << BSHIFT;
 	_SetCol(col);
 }
 void _SetCol6666(u32 val)
 {
-	u32 col = lut6to8[(val>>18)&0x3F] << RSHIFT;
+	u32 col = Convert6To8((val >> 18) & 0x3F) << RSHIFT;
-	col    |= lut6to8[(val>>12)&0x3F] << GSHIFT;
+	col |= Convert6To8((val >> 12) & 0x3F) << GSHIFT;
-	col    |= lut6to8[(val>>6)&0x3F]  << BSHIFT;
+	col |= Convert6To8((val >> 6) & 0x3F) << BSHIFT;
-	col    |= lut6to8[(val>>0)&0x3F]  << ASHIFT;
+	col |= Convert6To8(val & 0x3F) << ASHIFT;
 	_SetCol(col);
 }
 void _SetCol565(u16 val)
 {
-	u32 col = lut5to8[(val>>11)&0x1f] << RSHIFT;
+	u32 col = Convert5To8((val >> 11) & 0x1F) << RSHIFT;
-	col     |= lut6to8[(val>>5 )&0x3f] << GSHIFT;
+	col |= Convert6To8((val >> 5) & 0x3F) << GSHIFT;
-	col     |= lut5to8[(val    )&0x1f] << BSHIFT;
+	col |= Convert5To8(val & 0x1F) << BSHIFT;
-	_SetCol(col | (0xFF<<ASHIFT));
+	_SetCol(col | (0xFF << ASHIFT));
 }
 //////////////////////////////////////////////////////////////////////////
 //////////////////////////////////////////////////////////////////////////
--- a/Source/Core/VideoCommon/VideoCommon.vcproj
+++ b/Source/Core/VideoCommon/VideoCommon.vcproj
@ -528,10 +528,6 @@
 				RelativePath=".\Src\IndexGenerator.h"
 				>
 			</File>
 			<File
 				RelativePath=".\Src\LookUpTables.cpp"
 				>
 			</File>
 			<File
 				RelativePath=".\Src\LookUpTables.h"
 				>
--- a/Source/Plugins/Plugin_VideoDX9/Src/main.cpp
+++ b/Source/Plugins/Plugin_VideoDX9/Src/main.cpp
@ -137,7 +137,6 @@ bool Init()
 			MessageBox(GetActiveWindow(), "Unable to initialize Direct3D. Please make sure that you have DirectX 9.0c correctly installed.", "Fatal Error", MB_OK);
 			return false;
 		}
 		InitLUTs();
 		InitXFBConvTables();
 	}
 	initCount++;
--- a/Source/Plugins/Plugin_VideoOGL/Src/main.cpp
+++ b/Source/Plugins/Plugin_VideoOGL/Src/main.cpp
@ -314,7 +314,6 @@ void Initialize(void *init)
    frameCount = 0;
    SVideoInitialize *_pVideoInitialize = (SVideoInitialize*)init;
    g_VideoInitialize = *(_pVideoInitialize); // Create a shortcut to _pVideoInitialize that can also update it
    InitLUTs();
 	InitXFBConvTables();
    g_Config.Load();