From 8ab4814d73215271f0baaccf5cc51e7eba2b9c17 Mon Sep 17 00:00:00 2001 From: Nolan Check Date: Thu, 30 Jul 2009 20:29:52 +0000 Subject: [PATCH] Don't use lookup tables. It's better to use CPU registers and reduce memory accesses. git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@3909 8ced0084-cf51-0410-be5f-012b33b47a6e --- Source/Core/VideoCommon/Src/LookUpTables.cpp | 47 ---------------- Source/Core/VideoCommon/Src/LookUpTables.h | 31 +++++++---- .../Core/VideoCommon/Src/TextureDecoder.cpp | 54 ++++++++----------- Source/Core/VideoCommon/Src/VertexLoader.cpp | 4 +- .../VideoCommon/Src/VertexLoader_Color.cpp | 24 ++++----- Source/Core/VideoCommon/VideoCommon.vcproj | 4 -- Source/Plugins/Plugin_VideoDX9/Src/main.cpp | 1 - Source/Plugins/Plugin_VideoOGL/Src/main.cpp | 1 - 8 files changed, 58 insertions(+), 108 deletions(-) delete mode 100644 Source/Core/VideoCommon/Src/LookUpTables.cpp diff --git a/Source/Core/VideoCommon/Src/LookUpTables.cpp b/Source/Core/VideoCommon/Src/LookUpTables.cpp deleted file mode 100644 index f5223457db..0000000000 --- a/Source/Core/VideoCommon/Src/LookUpTables.cpp +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright (C) 2003 Dolphin Project. - -// This program is free software: you can redistribute it and/or modify -// it under the terms of the GNU General Public License as published by -// the Free Software Foundation, version 2.0. - -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License 2.0 for more details. - -// A copy of the GPL 2.0 should have been included with the program. -// If not, see http://www.gnu.org/licenses/ - -// Official SVN repository and contact information can be found at -// http://code.google.com/p/dolphin-emu/ - -#include "LookUpTables.h" - -const int lut3to8[] = { 0x00,0x24,0x48,0x6D,0x91,0xB6,0xDA,0xFF}; -const int lut4to8[] = { 0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77, - 0x88,0x99,0xAA,0xBB,0xCC,0xDD,0xEE,0xFF}; -const int lut5to8[] = { 0x00,0x08,0x10,0x18,0x20,0x29,0x31,0x39, - 0x41,0x4A,0x52,0x5A,0x62,0x6A,0x73,0x7B, - 0x83,0x8B,0x94,0x9C,0xA4,0xAC,0xB4,0xBD, - 0xC5,0xCD,0xD5,0xDE,0xE6,0xEE,0xF6,0xFF}; -int lut6to8[64]; -float lutu8tosfloat[256]; -float lutu8toufloat[256]; -float luts8tosfloat[256]; -float shiftLookup[32]; - -void InitLUTs() -{ - for (int i = 0; i < 32; i++) - shiftLookup[i] = 1.0f / float(1 << i); - - for (int i = 0; i < 64; i++) - lut6to8[i] = (i*255) / 63; - - for (int i = 0; i < 256; i++) - { - lutu8tosfloat[i] = (float)(i - 128) / 127.0f; - lutu8toufloat[i] = (float)(i) / 255.0f; - luts8tosfloat[i] = ((float)(signed char)(char)i) / 127.0f; - } -} diff --git a/Source/Core/VideoCommon/Src/LookUpTables.h b/Source/Core/VideoCommon/Src/LookUpTables.h index 304ef739bd..815846218b 100644 --- a/Source/Core/VideoCommon/Src/LookUpTables.h +++ b/Source/Core/VideoCommon/Src/LookUpTables.h @@ -20,15 +20,28 @@ #include "Common.h" -extern const int lut3to8[8]; -extern const int lut4to8[16]; -extern const int lut5to8[32]; -extern int lut6to8[64]; -extern float lutu8tosfloat[256]; -extern float lutu8toufloat[256]; -extern float luts8tosfloat[256]; -extern float shiftLookup[32]; +inline u8 Convert3To8(u8 v) +{ + // Swizzle bits: 00000123 -> 12312312 + return (v << 5) | (v << 2) | (v >> 1); +} -void InitLUTs(); +inline u8 Convert4To8(u8 v) +{ + // Swizzle bits: 00001234 -> 12341234 + return (v << 4) | v; +} + +inline u8 Convert5To8(u8 v) +{ + // Swizzle bits: 00012345 -> 12345123 + return (v << 3) | (v >> 2); +} + +inline u8 Convert6To8(u8 v) +{ + // Swizzle bits: 00123456 -> 12345612 + return (v << 2) | (v >> 4); +} #endif // _LOOKUPTABLES_H diff --git a/Source/Core/VideoCommon/Src/TextureDecoder.cpp b/Source/Core/VideoCommon/Src/TextureDecoder.cpp index 4ff92967a7..c427efaddc 100644 --- a/Source/Core/VideoCommon/Src/TextureDecoder.cpp +++ b/Source/Core/VideoCommon/Src/TextureDecoder.cpp @@ -150,16 +150,6 @@ int TexDecoder_GetPaletteSize(int format) } } -inline u32 decode565(u16 val) -{ - int r,g,b,a; - r=lut5to8[(val>>11) & 0x1f]; - g=lut6to8[(val>>5 ) & 0x3f]; - b=lut5to8[(val ) & 0x1f]; - a=0xFF; - return (a << 24) | (r << 16) | (g << 8) | b; -} - inline u32 decodeIA8(u16 val) { int a = val >> 8; @@ -172,17 +162,17 @@ inline u32 decode5A3(u16 val) int r,g,b,a; if ((val & 0x8000)) { - r=lut5to8[(val >> 10) & 0x1f]; - g=lut5to8[(val >> 5 ) & 0x1f]; - b=lut5to8[(val ) & 0x1f]; - a=0xFF; + a = 0xFF; + r = Convert5To8((val >> 10) & 0x1F); + g = Convert5To8((val >> 5) & 0x1F); + b = Convert5To8(val & 0x1F); } else { - a=lut3to8[(val >> 12) & 0x7]; - r=lut4to8[(val >> 8 ) & 0xf]; - g=lut4to8[(val >> 4 ) & 0xf]; - b=lut4to8[(val ) & 0xf]; + a = Convert3To8((val >> 12) & 0x7); + r = Convert4To8((val >> 8) & 0xF); + g = Convert4To8((val >> 4) & 0xF); + b = Convert4To8(val & 0xF); } return (a << 24) | (r << 16) | (g << 8) | b; } @@ -263,13 +253,13 @@ inline void decodebytesC14X2_To_Raw16(u16* dst, const u16* src, int tlutaddr) //inline void decodebytesIA4(u16 *dst, const u8 *src, int numbytes) inline void decodebytesIA4(u16 *dst, const u8 *src) { - for (int x = 0; x < 8; x++) - { + for (int x = 0; x < 8; x++) + { const u8 val = src[x]; - const u8 a = lut4to8[val >> 4]; - const u8 l = lut4to8[val & 0xF]; - dst[x] = (a << 8) | l; - } + u8 a = Convert4To8(val >> 4); + u8 l = Convert4To8(val & 0xF); + dst[x] = (a << 8) | l; + } } //inline void decodebytesRGB5A3(u32 *dst, const u16 *src, int numpixels) @@ -305,12 +295,12 @@ void decodeDXTBlock(u32 *dst, const DXTBlock *src, int pitch) // S3TC Decoder (Note: GCN decodes differently from PC) u16 c1 = Common::swap16(src->color1); u16 c2 = Common::swap16(src->color2); - int blue1 = lut5to8[c1 & 0x1F]; - int blue2 = lut5to8[c2 & 0x1F]; - int green1 = lut6to8[(c1 >> 5) & 0x3F]; - int green2 = lut6to8[(c2 >> 5) & 0x3F]; - int red1 = lut5to8[(c1 >> 11) & 0x1F]; - int red2 = lut5to8[(c2 >> 11) & 0x1F]; + u8 blue1 = Convert5To8(c1 & 0x1F); + u8 blue2 = Convert5To8(c2 & 0x1F); + u8 green1 = Convert6To8((c1 >> 5) & 0x3F); + u8 green2 = Convert6To8((c2 >> 5) & 0x3F); + u8 red1 = Convert5To8((c1 >> 11) & 0x1F); + u8 red2 = Convert5To8((c2 >> 11) & 0x1F); int colors[4]; @@ -402,8 +392,8 @@ PC_TexFormat TexDecoder_Decode_real(u8 *dst, const u8 *src, int width, int heigh for (int ix = 0; ix < 4; ix++) { int val = src[ix]; - dst[(y + iy) * width + x + ix * 2] = lut4to8[val >> 4]; - dst[(y + iy) * width + x + ix * 2 + 1] = lut4to8[val & 15]; + dst[(y + iy) * width + x + ix * 2] = Convert4To8(val >> 4); + dst[(y + iy) * width + x + ix * 2 + 1] = Convert4To8(val & 0xF); } } return PC_TEX_FMT_I4_AS_I8; diff --git a/Source/Core/VideoCommon/Src/VertexLoader.cpp b/Source/Core/VideoCommon/Src/VertexLoader.cpp index d02b889c43..9fe3dc8768 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader.cpp @@ -619,10 +619,10 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count) m_VtxAttr.texCoord[7].Frac = g_VtxAttr[vtx_attr_group].g2.Tex7Frac; pVtxAttr = &m_VtxAttr; - posScale = shiftLookup[m_VtxAttr.PosFrac]; + posScale = 1.0f / float(1 << m_VtxAttr.PosFrac); if (m_NativeFmt->m_components & VB_HAS_UVALL) for (int i = 0; i < 8; i++) - tcScale[i] = shiftLookup[m_VtxAttr.texCoord[i].Frac]; + tcScale[i] = 1.0f / float(1 << m_VtxAttr.texCoord[i].Frac); for (int i = 0; i < 2; i++) colElements[i] = m_VtxAttr.color[i].Elements; diff --git a/Source/Core/VideoCommon/Src/VertexLoader_Color.cpp b/Source/Core/VideoCommon/Src/VertexLoader_Color.cpp index 2c52311517..44b4b87069 100644 --- a/Source/Core/VideoCommon/Src/VertexLoader_Color.cpp +++ b/Source/Core/VideoCommon/Src/VertexLoader_Color.cpp @@ -42,28 +42,28 @@ inline void _SetCol(u32 val) void _SetCol4444(u16 val) { - u32 col = lut4to8[(val>>0)&0xF]<>12)&0xF] <>8)&0xF] <>4)&0xF] <> 12) & 0xF) << RSHIFT; + col |= Convert4To8((val >> 8) & 0xF) << GSHIFT; + col |= Convert4To8((val >> 4) & 0xF) << BSHIFT; _SetCol(col); } void _SetCol6666(u32 val) { - u32 col = lut6to8[(val>>18)&0x3F] << RSHIFT; - col |= lut6to8[(val>>12)&0x3F] << GSHIFT; - col |= lut6to8[(val>>6)&0x3F] << BSHIFT; - col |= lut6to8[(val>>0)&0x3F] << ASHIFT; + u32 col = Convert6To8((val >> 18) & 0x3F) << RSHIFT; + col |= Convert6To8((val >> 12) & 0x3F) << GSHIFT; + col |= Convert6To8((val >> 6) & 0x3F) << BSHIFT; + col |= Convert6To8(val & 0x3F) << ASHIFT; _SetCol(col); } void _SetCol565(u16 val) { - u32 col = lut5to8[(val>>11)&0x1f] << RSHIFT; - col |= lut6to8[(val>>5 )&0x3f] << GSHIFT; - col |= lut5to8[(val )&0x1f] << BSHIFT; - _SetCol(col | (0xFF<> 11) & 0x1F) << RSHIFT; + col |= Convert6To8((val >> 5) & 0x3F) << GSHIFT; + col |= Convert5To8(val & 0x1F) << BSHIFT; + _SetCol(col | (0xFF << ASHIFT)); } ////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////// diff --git a/Source/Core/VideoCommon/VideoCommon.vcproj b/Source/Core/VideoCommon/VideoCommon.vcproj index 5c32f00e69..1335ab1ea8 100644 --- a/Source/Core/VideoCommon/VideoCommon.vcproj +++ b/Source/Core/VideoCommon/VideoCommon.vcproj @@ -528,10 +528,6 @@ RelativePath=".\Src\IndexGenerator.h" > - - diff --git a/Source/Plugins/Plugin_VideoDX9/Src/main.cpp b/Source/Plugins/Plugin_VideoDX9/Src/main.cpp index 786cb0e7ff..cd6aa07708 100644 --- a/Source/Plugins/Plugin_VideoDX9/Src/main.cpp +++ b/Source/Plugins/Plugin_VideoDX9/Src/main.cpp @@ -137,7 +137,6 @@ bool Init() MessageBox(GetActiveWindow(), "Unable to initialize Direct3D. Please make sure that you have DirectX 9.0c correctly installed.", "Fatal Error", MB_OK); return false; } - InitLUTs(); InitXFBConvTables(); } initCount++; diff --git a/Source/Plugins/Plugin_VideoOGL/Src/main.cpp b/Source/Plugins/Plugin_VideoOGL/Src/main.cpp index e8e48a93fb..596a260d76 100644 --- a/Source/Plugins/Plugin_VideoOGL/Src/main.cpp +++ b/Source/Plugins/Plugin_VideoOGL/Src/main.cpp @@ -314,7 +314,6 @@ void Initialize(void *init) frameCount = 0; SVideoInitialize *_pVideoInitialize = (SVideoInitialize*)init; g_VideoInitialize = *(_pVideoInitialize); // Create a shortcut to _pVideoInitialize that can also update it - InitLUTs(); InitXFBConvTables(); g_Config.Load();