Don't use lookup tables. It's better to use CPU registers and reduce memory accesses.
git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@3909 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
parent
dcae5938c9
commit
8ab4814d73
|
@ -1,47 +0,0 @@
|
||||||
// Copyright (C) 2003 Dolphin Project.
|
|
||||||
|
|
||||||
// This program is free software: you can redistribute it and/or modify
|
|
||||||
// it under the terms of the GNU General Public License as published by
|
|
||||||
// the Free Software Foundation, version 2.0.
|
|
||||||
|
|
||||||
// This program is distributed in the hope that it will be useful,
|
|
||||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
||||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
||||||
// GNU General Public License 2.0 for more details.
|
|
||||||
|
|
||||||
// A copy of the GPL 2.0 should have been included with the program.
|
|
||||||
// If not, see http://www.gnu.org/licenses/
|
|
||||||
|
|
||||||
// Official SVN repository and contact information can be found at
|
|
||||||
// http://code.google.com/p/dolphin-emu/
|
|
||||||
|
|
||||||
#include "LookUpTables.h"
|
|
||||||
|
|
||||||
const int lut3to8[] = { 0x00,0x24,0x48,0x6D,0x91,0xB6,0xDA,0xFF};
|
|
||||||
const int lut4to8[] = { 0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77,
|
|
||||||
0x88,0x99,0xAA,0xBB,0xCC,0xDD,0xEE,0xFF};
|
|
||||||
const int lut5to8[] = { 0x00,0x08,0x10,0x18,0x20,0x29,0x31,0x39,
|
|
||||||
0x41,0x4A,0x52,0x5A,0x62,0x6A,0x73,0x7B,
|
|
||||||
0x83,0x8B,0x94,0x9C,0xA4,0xAC,0xB4,0xBD,
|
|
||||||
0xC5,0xCD,0xD5,0xDE,0xE6,0xEE,0xF6,0xFF};
|
|
||||||
int lut6to8[64];
|
|
||||||
float lutu8tosfloat[256];
|
|
||||||
float lutu8toufloat[256];
|
|
||||||
float luts8tosfloat[256];
|
|
||||||
float shiftLookup[32];
|
|
||||||
|
|
||||||
void InitLUTs()
|
|
||||||
{
|
|
||||||
for (int i = 0; i < 32; i++)
|
|
||||||
shiftLookup[i] = 1.0f / float(1 << i);
|
|
||||||
|
|
||||||
for (int i = 0; i < 64; i++)
|
|
||||||
lut6to8[i] = (i*255) / 63;
|
|
||||||
|
|
||||||
for (int i = 0; i < 256; i++)
|
|
||||||
{
|
|
||||||
lutu8tosfloat[i] = (float)(i - 128) / 127.0f;
|
|
||||||
lutu8toufloat[i] = (float)(i) / 255.0f;
|
|
||||||
luts8tosfloat[i] = ((float)(signed char)(char)i) / 127.0f;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -20,15 +20,28 @@
|
||||||
|
|
||||||
#include "Common.h"
|
#include "Common.h"
|
||||||
|
|
||||||
extern const int lut3to8[8];
|
inline u8 Convert3To8(u8 v)
|
||||||
extern const int lut4to8[16];
|
{
|
||||||
extern const int lut5to8[32];
|
// Swizzle bits: 00000123 -> 12312312
|
||||||
extern int lut6to8[64];
|
return (v << 5) | (v << 2) | (v >> 1);
|
||||||
extern float lutu8tosfloat[256];
|
}
|
||||||
extern float lutu8toufloat[256];
|
|
||||||
extern float luts8tosfloat[256];
|
|
||||||
extern float shiftLookup[32];
|
|
||||||
|
|
||||||
void InitLUTs();
|
inline u8 Convert4To8(u8 v)
|
||||||
|
{
|
||||||
|
// Swizzle bits: 00001234 -> 12341234
|
||||||
|
return (v << 4) | v;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline u8 Convert5To8(u8 v)
|
||||||
|
{
|
||||||
|
// Swizzle bits: 00012345 -> 12345123
|
||||||
|
return (v << 3) | (v >> 2);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline u8 Convert6To8(u8 v)
|
||||||
|
{
|
||||||
|
// Swizzle bits: 00123456 -> 12345612
|
||||||
|
return (v << 2) | (v >> 4);
|
||||||
|
}
|
||||||
|
|
||||||
#endif // _LOOKUPTABLES_H
|
#endif // _LOOKUPTABLES_H
|
||||||
|
|
|
@ -150,16 +150,6 @@ int TexDecoder_GetPaletteSize(int format)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline u32 decode565(u16 val)
|
|
||||||
{
|
|
||||||
int r,g,b,a;
|
|
||||||
r=lut5to8[(val>>11) & 0x1f];
|
|
||||||
g=lut6to8[(val>>5 ) & 0x3f];
|
|
||||||
b=lut5to8[(val ) & 0x1f];
|
|
||||||
a=0xFF;
|
|
||||||
return (a << 24) | (r << 16) | (g << 8) | b;
|
|
||||||
}
|
|
||||||
|
|
||||||
inline u32 decodeIA8(u16 val)
|
inline u32 decodeIA8(u16 val)
|
||||||
{
|
{
|
||||||
int a = val >> 8;
|
int a = val >> 8;
|
||||||
|
@ -172,17 +162,17 @@ inline u32 decode5A3(u16 val)
|
||||||
int r,g,b,a;
|
int r,g,b,a;
|
||||||
if ((val & 0x8000))
|
if ((val & 0x8000))
|
||||||
{
|
{
|
||||||
r=lut5to8[(val >> 10) & 0x1f];
|
a = 0xFF;
|
||||||
g=lut5to8[(val >> 5 ) & 0x1f];
|
r = Convert5To8((val >> 10) & 0x1F);
|
||||||
b=lut5to8[(val ) & 0x1f];
|
g = Convert5To8((val >> 5) & 0x1F);
|
||||||
a=0xFF;
|
b = Convert5To8(val & 0x1F);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
a=lut3to8[(val >> 12) & 0x7];
|
a = Convert3To8((val >> 12) & 0x7);
|
||||||
r=lut4to8[(val >> 8 ) & 0xf];
|
r = Convert4To8((val >> 8) & 0xF);
|
||||||
g=lut4to8[(val >> 4 ) & 0xf];
|
g = Convert4To8((val >> 4) & 0xF);
|
||||||
b=lut4to8[(val ) & 0xf];
|
b = Convert4To8(val & 0xF);
|
||||||
}
|
}
|
||||||
return (a << 24) | (r << 16) | (g << 8) | b;
|
return (a << 24) | (r << 16) | (g << 8) | b;
|
||||||
}
|
}
|
||||||
|
@ -263,13 +253,13 @@ inline void decodebytesC14X2_To_Raw16(u16* dst, const u16* src, int tlutaddr)
|
||||||
//inline void decodebytesIA4(u16 *dst, const u8 *src, int numbytes)
|
//inline void decodebytesIA4(u16 *dst, const u8 *src, int numbytes)
|
||||||
inline void decodebytesIA4(u16 *dst, const u8 *src)
|
inline void decodebytesIA4(u16 *dst, const u8 *src)
|
||||||
{
|
{
|
||||||
for (int x = 0; x < 8; x++)
|
for (int x = 0; x < 8; x++)
|
||||||
{
|
{
|
||||||
const u8 val = src[x];
|
const u8 val = src[x];
|
||||||
const u8 a = lut4to8[val >> 4];
|
u8 a = Convert4To8(val >> 4);
|
||||||
const u8 l = lut4to8[val & 0xF];
|
u8 l = Convert4To8(val & 0xF);
|
||||||
dst[x] = (a << 8) | l;
|
dst[x] = (a << 8) | l;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//inline void decodebytesRGB5A3(u32 *dst, const u16 *src, int numpixels)
|
//inline void decodebytesRGB5A3(u32 *dst, const u16 *src, int numpixels)
|
||||||
|
@ -305,12 +295,12 @@ void decodeDXTBlock(u32 *dst, const DXTBlock *src, int pitch)
|
||||||
// S3TC Decoder (Note: GCN decodes differently from PC)
|
// S3TC Decoder (Note: GCN decodes differently from PC)
|
||||||
u16 c1 = Common::swap16(src->color1);
|
u16 c1 = Common::swap16(src->color1);
|
||||||
u16 c2 = Common::swap16(src->color2);
|
u16 c2 = Common::swap16(src->color2);
|
||||||
int blue1 = lut5to8[c1 & 0x1F];
|
u8 blue1 = Convert5To8(c1 & 0x1F);
|
||||||
int blue2 = lut5to8[c2 & 0x1F];
|
u8 blue2 = Convert5To8(c2 & 0x1F);
|
||||||
int green1 = lut6to8[(c1 >> 5) & 0x3F];
|
u8 green1 = Convert6To8((c1 >> 5) & 0x3F);
|
||||||
int green2 = lut6to8[(c2 >> 5) & 0x3F];
|
u8 green2 = Convert6To8((c2 >> 5) & 0x3F);
|
||||||
int red1 = lut5to8[(c1 >> 11) & 0x1F];
|
u8 red1 = Convert5To8((c1 >> 11) & 0x1F);
|
||||||
int red2 = lut5to8[(c2 >> 11) & 0x1F];
|
u8 red2 = Convert5To8((c2 >> 11) & 0x1F);
|
||||||
|
|
||||||
int colors[4];
|
int colors[4];
|
||||||
|
|
||||||
|
@ -402,8 +392,8 @@ PC_TexFormat TexDecoder_Decode_real(u8 *dst, const u8 *src, int width, int heigh
|
||||||
for (int ix = 0; ix < 4; ix++)
|
for (int ix = 0; ix < 4; ix++)
|
||||||
{
|
{
|
||||||
int val = src[ix];
|
int val = src[ix];
|
||||||
dst[(y + iy) * width + x + ix * 2] = lut4to8[val >> 4];
|
dst[(y + iy) * width + x + ix * 2] = Convert4To8(val >> 4);
|
||||||
dst[(y + iy) * width + x + ix * 2 + 1] = lut4to8[val & 15];
|
dst[(y + iy) * width + x + ix * 2 + 1] = Convert4To8(val & 0xF);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return PC_TEX_FMT_I4_AS_I8;
|
return PC_TEX_FMT_I4_AS_I8;
|
||||||
|
|
|
@ -619,10 +619,10 @@ void VertexLoader::RunVertices(int vtx_attr_group, int primitive, int count)
|
||||||
m_VtxAttr.texCoord[7].Frac = g_VtxAttr[vtx_attr_group].g2.Tex7Frac;
|
m_VtxAttr.texCoord[7].Frac = g_VtxAttr[vtx_attr_group].g2.Tex7Frac;
|
||||||
|
|
||||||
pVtxAttr = &m_VtxAttr;
|
pVtxAttr = &m_VtxAttr;
|
||||||
posScale = shiftLookup[m_VtxAttr.PosFrac];
|
posScale = 1.0f / float(1 << m_VtxAttr.PosFrac);
|
||||||
if (m_NativeFmt->m_components & VB_HAS_UVALL)
|
if (m_NativeFmt->m_components & VB_HAS_UVALL)
|
||||||
for (int i = 0; i < 8; i++)
|
for (int i = 0; i < 8; i++)
|
||||||
tcScale[i] = shiftLookup[m_VtxAttr.texCoord[i].Frac];
|
tcScale[i] = 1.0f / float(1 << m_VtxAttr.texCoord[i].Frac);
|
||||||
for (int i = 0; i < 2; i++)
|
for (int i = 0; i < 2; i++)
|
||||||
colElements[i] = m_VtxAttr.color[i].Elements;
|
colElements[i] = m_VtxAttr.color[i].Elements;
|
||||||
|
|
||||||
|
|
|
@ -42,28 +42,28 @@ inline void _SetCol(u32 val)
|
||||||
|
|
||||||
void _SetCol4444(u16 val)
|
void _SetCol4444(u16 val)
|
||||||
{
|
{
|
||||||
u32 col = lut4to8[(val>>0)&0xF]<<ASHIFT;
|
u32 col = Convert4To8(val & 0xF) << ASHIFT;
|
||||||
col |= lut4to8[(val>>12)&0xF] <<RSHIFT;
|
col |= Convert4To8((val >> 12) & 0xF) << RSHIFT;
|
||||||
col |= lut4to8[(val>>8)&0xF] <<GSHIFT;
|
col |= Convert4To8((val >> 8) & 0xF) << GSHIFT;
|
||||||
col |= lut4to8[(val>>4)&0xF] <<BSHIFT;
|
col |= Convert4To8((val >> 4) & 0xF) << BSHIFT;
|
||||||
_SetCol(col);
|
_SetCol(col);
|
||||||
}
|
}
|
||||||
|
|
||||||
void _SetCol6666(u32 val)
|
void _SetCol6666(u32 val)
|
||||||
{
|
{
|
||||||
u32 col = lut6to8[(val>>18)&0x3F] << RSHIFT;
|
u32 col = Convert6To8((val >> 18) & 0x3F) << RSHIFT;
|
||||||
col |= lut6to8[(val>>12)&0x3F] << GSHIFT;
|
col |= Convert6To8((val >> 12) & 0x3F) << GSHIFT;
|
||||||
col |= lut6to8[(val>>6)&0x3F] << BSHIFT;
|
col |= Convert6To8((val >> 6) & 0x3F) << BSHIFT;
|
||||||
col |= lut6to8[(val>>0)&0x3F] << ASHIFT;
|
col |= Convert6To8(val & 0x3F) << ASHIFT;
|
||||||
_SetCol(col);
|
_SetCol(col);
|
||||||
}
|
}
|
||||||
|
|
||||||
void _SetCol565(u16 val)
|
void _SetCol565(u16 val)
|
||||||
{
|
{
|
||||||
u32 col = lut5to8[(val>>11)&0x1f] << RSHIFT;
|
u32 col = Convert5To8((val >> 11) & 0x1F) << RSHIFT;
|
||||||
col |= lut6to8[(val>>5 )&0x3f] << GSHIFT;
|
col |= Convert6To8((val >> 5) & 0x3F) << GSHIFT;
|
||||||
col |= lut5to8[(val )&0x1f] << BSHIFT;
|
col |= Convert5To8(val & 0x1F) << BSHIFT;
|
||||||
_SetCol(col | (0xFF<<ASHIFT));
|
_SetCol(col | (0xFF << ASHIFT));
|
||||||
}
|
}
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
//////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
|
|
@ -528,10 +528,6 @@
|
||||||
RelativePath=".\Src\IndexGenerator.h"
|
RelativePath=".\Src\IndexGenerator.h"
|
||||||
>
|
>
|
||||||
</File>
|
</File>
|
||||||
<File
|
|
||||||
RelativePath=".\Src\LookUpTables.cpp"
|
|
||||||
>
|
|
||||||
</File>
|
|
||||||
<File
|
<File
|
||||||
RelativePath=".\Src\LookUpTables.h"
|
RelativePath=".\Src\LookUpTables.h"
|
||||||
>
|
>
|
||||||
|
|
|
@ -137,7 +137,6 @@ bool Init()
|
||||||
MessageBox(GetActiveWindow(), "Unable to initialize Direct3D. Please make sure that you have DirectX 9.0c correctly installed.", "Fatal Error", MB_OK);
|
MessageBox(GetActiveWindow(), "Unable to initialize Direct3D. Please make sure that you have DirectX 9.0c correctly installed.", "Fatal Error", MB_OK);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
InitLUTs();
|
|
||||||
InitXFBConvTables();
|
InitXFBConvTables();
|
||||||
}
|
}
|
||||||
initCount++;
|
initCount++;
|
||||||
|
|
|
@ -314,7 +314,6 @@ void Initialize(void *init)
|
||||||
frameCount = 0;
|
frameCount = 0;
|
||||||
SVideoInitialize *_pVideoInitialize = (SVideoInitialize*)init;
|
SVideoInitialize *_pVideoInitialize = (SVideoInitialize*)init;
|
||||||
g_VideoInitialize = *(_pVideoInitialize); // Create a shortcut to _pVideoInitialize that can also update it
|
g_VideoInitialize = *(_pVideoInitialize); // Create a shortcut to _pVideoInitialize that can also update it
|
||||||
InitLUTs();
|
|
||||||
InitXFBConvTables();
|
InitXFBConvTables();
|
||||||
g_Config.Load();
|
g_Config.Load();
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue