Don't use lookup tables. It's better to use CPU registers and reduce memory accesses.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@3909 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
Nolan Check 2009-07-30 20:29:52 +00:00
parent dcae5938c9
commit 8ab4814d73
8 changed files with 58 additions and 108 deletions

View File

@ -1,47 +0,0 @@
// Copyright (C) 2003 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#include "LookUpTables.h"
const int lut3to8[] = { 0x00,0x24,0x48,0x6D,0x91,0xB6,0xDA,0xFF};
const int lut4to8[] = { 0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77,
0x88,0x99,0xAA,0xBB,0xCC,0xDD,0xEE,0xFF};
const int lut5to8[] = { 0x00,0x08,0x10,0x18,0x20,0x29,0x31,0x39,
0x41,0x4A,0x52,0x5A,0x62,0x6A,0x73,0x7B,
0x83,0x8B,0x94,0x9C,0xA4,0xAC,0xB4,0xBD,
0xC5,0xCD,0xD5,0xDE,0xE6,0xEE,0xF6,0xFF};
int lut6to8[64];
float lutu8tosfloat[256];
float lutu8toufloat[256];
float luts8tosfloat[256];
float shiftLookup[32];
void InitLUTs()
{
for (int i = 0; i < 32; i++)
shiftLookup[i] = 1.0f / float(1 << i);
for (int i = 0; i < 64; i++)
lut6to8[i] = (i*255) / 63;
for (int i = 0; i < 256; i++)
{
lutu8tosfloat[i] = (float)(i - 128) / 127.0f;
lutu8toufloat[i] = (float)(i) / 255.0f;
luts8tosfloat[i] = ((float)(signed char)(char)i) / 127.0f;
}
}

View File

@ -20,15 +20,28 @@
#include "Common.h" #include "Common.h"
extern const int lut3to8[8]; inline u8 Convert3To8(u8 v)
extern const int lut4to8[16]; {
extern const int lut5to8[32]; // Swizzle bits: 00000123 -> 12312312
extern int lut6to8[64]; return (v << 5) | (v << 2) | (v >> 1);
extern float lutu8tosfloat[256]; }
extern float lutu8toufloat[256];
extern float luts8tosfloat[256];
extern float shiftLookup[32];
void InitLUTs(); inline u8 Convert4To8(u8 v)
{
// Swizzle bits: 00001234 -> 12341234
return (v << 4) | v;
}
inline u8 Convert5To8(u8 v)
{
// Swizzle bits: 00012345 -> 12345123
return (v << 3) | (v >> 2);
}
inline u8 Convert6To8(u8 v)
{
// Swizzle bits: 00123456 -> 12345612
return (v << 2) | (v >> 4);
}
#endif // _LOOKUPTABLES_H #endif // _LOOKUPTABLES_H

View File

@ -150,16 +150,6 @@ int TexDecoder_GetPaletteSize(int format)
} }
} }
inline u32 decode565(u16 val)
{
int r,g,b,a;
r=lut5to8[(val>>11) & 0x1f];
g=lut6to8[(val>>5 ) & 0x3f];
b=lut5to8[(val ) & 0x1f];
a=0xFF;
return (a << 24) | (r << 16) | (g << 8) | b;
}
inline u32 decodeIA8(u16 val) inline u32 decodeIA8(u16 val)
{ {
int a = val >> 8; int a = val >> 8;
@ -172,17 +162,17 @@ inline u32 decode5A3(u16 val)
int r,g,b,a; int r,g,b,a;
if ((val & 0x8000)) if ((val & 0x8000))
{ {
r=lut5to8[(val >> 10) & 0x1f]; a = 0xFF;
g=lut5to8[(val >> 5 ) & 0x1f]; r = Convert5To8((val >> 10) & 0x1F);
b=lut5to8[(val ) & 0x1f]; g = Convert5To8((val >> 5) & 0x1F);
a=0xFF; b = Convert5To8(val & 0x1F);
} }
else else
{ {
a=lut3to8[(val >> 12) & 0x7]; a = Convert3To8((val >> 12) & 0x7);
r=lut4to8[(val >> 8 ) & 0xf]; r = Convert4To8((val >> 8) & 0xF);
g=lut4to8[(val >> 4 ) & 0xf]; g = Convert4To8((val >> 4) & 0xF);
b=lut4to8[(val ) & 0xf]; b = Convert4To8(val & 0xF);
} }
return (a << 24) | (r << 16) | (g << 8) | b; return (a << 24) | (r << 16) | (g << 8) | b;
} }
@ -263,13 +253,13 @@ inline void decodebytesC14X2_To_Raw16(u16* dst, const u16* src, int tlutaddr)
//inline void decodebytesIA4(u16 *dst, const u8 *src, int numbytes) //inline void decodebytesIA4(u16 *dst, const u8 *src, int numbytes)
inline void decodebytesIA4(u16 *dst, const u8 *src) inline void decodebytesIA4(u16 *dst, const u8 *src)
{ {
for (int x = 0; x < 8; x++) for (int x = 0; x < 8; x++)
{ {
const u8 val = src[x]; const u8 val = src[x];
const u8 a = lut4to8[val >> 4]; u8 a = Convert4To8(val >> 4);
const u8 l = lut4to8[val & 0xF]; u8 l = Convert4To8(val & 0xF);
dst[x] = (a << 8) | l; dst[x] = (a << 8) | l;
} }
} }
//inline void decodebytesRGB5A3(u32 *dst, const u16 *src, int numpixels) //inline void decodebytesRGB5A3(u32 *dst, const u16 *src, int numpixels)
@ -305,12 +295,12 @@ void decodeDXTBlock(u32 *dst, const DXTBlock *src, int pitch)
// S3TC Decoder (Note: GCN decodes differently from PC) // S3TC Decoder (Note: GCN decodes differently from PC)
u16 c1 = Common::swap16(src->color1); u16 c1 = Common::swap16(src->color1);
u16 c2 = Common::swap16(src->color2); u16 c2 = Common::swap16(src->color2);
int blue1 = lut5to8[c1 & 0x1F]; u8 blue1 = Convert5To8(c1 & 0x1F);
int blue2 = lut5to8[c2 & 0x1F]; u8 blue2 = Convert5To8(c2 & 0x1F);
int green1 = lut6to8[(c1 >> 5) & 0x3F]; u8 green1 = Convert6To8((c1 >> 5) & 0x3F);
int green2 = lut6to8[(c2 >> 5) & 0x3F]; u8 green2 = Convert6To8((c2 >> 5) & 0x3F);
int red1 = lut5to8[(c1 >> 11) & 0x1F]; u8 red1 = Convert5To8((c1 >> 11) & 0x1F);
int red2 = lut5to8[(c2 >> 11) & 0x1F]; u8 red2 = Convert5To8((c2 >> 11) & 0x1F);
int colors[4]; int colors[4];
@ -402,8 +392,8 @@ PC_TexFormat TexDecoder_Decode_real(u8 *dst, const u8 *src, int width, int heigh
for (int ix = 0; ix < 4; ix++) for (int ix = 0; ix < 4; ix++)
{ {
int val = src[ix]; int val = src[ix];
dst[(y + iy) * width + x + ix * 2] = lut4to8[val >> 4]; dst[(y + iy) * width + x + ix * 2] = Convert4To8(val >> 4);
dst[(y + iy) * width + x + ix * 2 + 1] = lut4to8[val & 15]; dst[(y + iy) * width + x + ix * 2 + 1] = Convert4To8(val & 0xF);
} }
} }
return PC_TEX_FMT_I4_AS_I8; return PC_TEX_FMT_I4_AS_I8;

View File

@ -619,10 +619,10 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
m_VtxAttr.texCoord[7].Frac = g_VtxAttr[vtx_attr_group].g2.Tex7Frac; m_VtxAttr.texCoord[7].Frac = g_VtxAttr[vtx_attr_group].g2.Tex7Frac;
pVtxAttr = &m_VtxAttr; pVtxAttr = &m_VtxAttr;
posScale = shiftLookup[m_VtxAttr.PosFrac]; posScale = 1.0f / float(1 << m_VtxAttr.PosFrac);
if (m_NativeFmt->m_components & VB_HAS_UVALL) if (m_NativeFmt->m_components & VB_HAS_UVALL)
for (int i = 0; i < 8; i++) for (int i = 0; i < 8; i++)
tcScale[i] = shiftLookup[m_VtxAttr.texCoord[i].Frac]; tcScale[i] = 1.0f / float(1 << m_VtxAttr.texCoord[i].Frac);
for (int i = 0; i < 2; i++) for (int i = 0; i < 2; i++)
colElements[i] = m_VtxAttr.color[i].Elements; colElements[i] = m_VtxAttr.color[i].Elements;

View File

@ -42,28 +42,28 @@ inline void _SetCol(u32 val)
void _SetCol4444(u16 val) void _SetCol4444(u16 val)
{ {
u32 col = lut4to8[(val>>0)&0xF]<<ASHIFT; u32 col = Convert4To8(val & 0xF) << ASHIFT;
col |= lut4to8[(val>>12)&0xF] <<RSHIFT; col |= Convert4To8((val >> 12) & 0xF) << RSHIFT;
col |= lut4to8[(val>>8)&0xF] <<GSHIFT; col |= Convert4To8((val >> 8) & 0xF) << GSHIFT;
col |= lut4to8[(val>>4)&0xF] <<BSHIFT; col |= Convert4To8((val >> 4) & 0xF) << BSHIFT;
_SetCol(col); _SetCol(col);
} }
void _SetCol6666(u32 val) void _SetCol6666(u32 val)
{ {
u32 col = lut6to8[(val>>18)&0x3F] << RSHIFT; u32 col = Convert6To8((val >> 18) & 0x3F) << RSHIFT;
col |= lut6to8[(val>>12)&0x3F] << GSHIFT; col |= Convert6To8((val >> 12) & 0x3F) << GSHIFT;
col |= lut6to8[(val>>6)&0x3F] << BSHIFT; col |= Convert6To8((val >> 6) & 0x3F) << BSHIFT;
col |= lut6to8[(val>>0)&0x3F] << ASHIFT; col |= Convert6To8(val & 0x3F) << ASHIFT;
_SetCol(col); _SetCol(col);
} }
void _SetCol565(u16 val) void _SetCol565(u16 val)
{ {
u32 col = lut5to8[(val>>11)&0x1f] << RSHIFT; u32 col = Convert5To8((val >> 11) & 0x1F) << RSHIFT;
col |= lut6to8[(val>>5 )&0x3f] << GSHIFT; col |= Convert6To8((val >> 5) & 0x3F) << GSHIFT;
col |= lut5to8[(val )&0x1f] << BSHIFT; col |= Convert5To8(val & 0x1F) << BSHIFT;
_SetCol(col | (0xFF<<ASHIFT)); _SetCol(col | (0xFF << ASHIFT));
} }
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////

View File

@ -528,10 +528,6 @@
RelativePath=".\Src\IndexGenerator.h" RelativePath=".\Src\IndexGenerator.h"
> >
</File> </File>
<File
RelativePath=".\Src\LookUpTables.cpp"
>
</File>
<File <File
RelativePath=".\Src\LookUpTables.h" RelativePath=".\Src\LookUpTables.h"
> >

View File

@ -137,7 +137,6 @@ bool Init()
MessageBox(GetActiveWindow(), "Unable to initialize Direct3D. Please make sure that you have DirectX 9.0c correctly installed.", "Fatal Error", MB_OK); MessageBox(GetActiveWindow(), "Unable to initialize Direct3D. Please make sure that you have DirectX 9.0c correctly installed.", "Fatal Error", MB_OK);
return false; return false;
} }
InitLUTs();
InitXFBConvTables(); InitXFBConvTables();
} }
initCount++; initCount++;

View File

@ -314,7 +314,6 @@ void Initialize(void *init)
frameCount = 0; frameCount = 0;
SVideoInitialize *_pVideoInitialize = (SVideoInitialize*)init; SVideoInitialize *_pVideoInitialize = (SVideoInitialize*)init;
g_VideoInitialize = *(_pVideoInitialize); // Create a shortcut to _pVideoInitialize that can also update it g_VideoInitialize = *(_pVideoInitialize); // Create a shortcut to _pVideoInitialize that can also update it
InitLUTs();
InitXFBConvTables(); InitXFBConvTables();
g_Config.Load(); g_Config.Load();