xenia/third_party/crunch/crnlib/crn_ryg_dxt.cpp

610 lines
15 KiB
C++

// File: crn_ryg_dxt.cpp
// RYG's real-time DXT compressor - Public domain.
#include "crn_core.h"
#include "crn_ryg_types.hpp"
#include "crn_ryg_dxt.hpp"
#ifdef _MSC_VER
#pragma warning (disable: 4244) // conversion from 'a' to 'b', possible loss of data
#endif
namespace ryg_dxt
{
// Couple of tables...
sU8 Expand5[32];
sU8 Expand6[64];
sU8 OMatch5[256][2];
sU8 OMatch6[256][2];
sU8 OMatch5_3[256][2];
sU8 OMatch6_3[256][2];
sU8 QuantRBTab[256+16];
sU8 QuantGTab[256+16];
static sInt Mul8Bit(sInt a,sInt b)
{
sInt t = a*b + 128;
return (t + (t >> 8)) >> 8;
}
union Pixel
{
struct
{
sU8 b,g,r,a;
};
sU32 v;
void From16Bit(sU16 v)
{
sInt rv = (v & 0xf800) >> 11;
sInt gv = (v & 0x07e0) >> 5;
sInt bv = (v & 0x001f) >> 0;
a = 0;
r = Expand5[rv];
g = Expand6[gv];
b = Expand5[bv];
}
sU16 As16Bit() const
{
return (Mul8Bit(r,31) << 11) + (Mul8Bit(g,63) << 5) + Mul8Bit(b,31);
}
void LerpRGB(const Pixel &p1,const Pixel &p2,sInt f)
{
r = p1.r + Mul8Bit(p2.r - p1.r,f);
g = p1.g + Mul8Bit(p2.g - p1.g,f);
b = p1.b + Mul8Bit(p2.b - p1.b,f);
}
};
/****************************************************************************/
static void PrepareOptTable4(sU8 *Table,const sU8 *expand,sInt size)
{
for(sInt i=0;i<256;i++)
{
sInt bestErr = 256;
for(sInt min=0;min<size;min++)
{
for(sInt max=0;max<size;max++)
{
sInt mine = expand[min];
sInt maxe = expand[max];
//sInt err = sAbs(maxe + Mul8Bit(mine-maxe,0x55) - i);
sInt err = sAbs(((maxe*2+mine)/3) - i);
err += ((sAbs(maxe-mine)*8)>>8); // approx. .03f
if(err < bestErr)
{
Table[i*2+0] = max;
Table[i*2+1] = min;
bestErr = err;
}
}
}
}
}
static void PrepareOptTable3(sU8 *Table,const sU8 *expand,sInt size)
{
for(sInt i=0;i<256;i++)
{
sInt bestErr = 256;
for(sInt min=0;min<size;min++)
{
for(sInt max=0;max<size;max++)
{
sInt mine = expand[min];
sInt maxe = expand[max];
sInt err = sAbs(((mine + maxe) >> 1) - i);
err += ((sAbs(maxe-mine)*8)>>8); // approx. .03f
if(err < bestErr)
{
Table[i*2+0] = max;
Table[i*2+1] = min;
bestErr = err;
}
}
}
}
}
static inline void EvalColors(Pixel *color,sU16 c0,sU16 c1)
{
color[0].From16Bit(c0);
color[1].From16Bit(c1);
color[2].LerpRGB(color[0],color[1],0x55);
color[3].LerpRGB(color[0],color[1],0xaa);
}
// Block dithering function. Simply dithers a block to 565 RGB.
// (Floyd-Steinberg)
static void DitherBlock(Pixel *dest,const Pixel *block)
{
sInt err[8],*ep1 = err,*ep2 = err+4;
// process channels seperately
for(sInt ch=0;ch<3;ch++)
{
sU8 *bp = (sU8 *) block;
sU8 *dp = (sU8 *) dest;
sU8 *quant = (ch == 1) ? QuantGTab+8 : QuantRBTab+8;
bp += ch;
dp += ch;
sSetMem(err,0,sizeof(err));
for(sInt y=0;y<4;y++)
{
// pixel 0
dp[ 0] = quant[bp[ 0] + ((3*ep2[1] + 5*ep2[0]) >> 4)];
ep1[0] = bp[ 0] - dp[ 0];
// pixel 1
dp[ 4] = quant[bp[ 4] + ((7*ep1[0] + 3*ep2[2] + 5*ep2[1] + ep2[0]) >> 4)];
ep1[1] = bp[ 4] - dp[ 4];
// pixel 2
dp[ 8] = quant[bp[ 8] + ((7*ep1[1] + 3*ep2[3] + 5*ep2[2] + ep2[1]) >> 4)];
ep1[2] = bp[ 8] - dp[ 8];
// pixel 3
dp[12] = quant[bp[12] + ((7*ep1[2] + 5*ep2[3] + ep2[2]) >> 4)];
ep1[3] = bp[12] - dp[12];
// advance to next line
sSwap(ep1,ep2);
bp += 16;
dp += 16;
}
}
}
// The color matching function
static sU32 MatchColorsBlock(const Pixel *block,const Pixel *color,sBool dither)
{
sU32 mask = 0;
sInt dirr = color[0].r - color[1].r;
sInt dirg = color[0].g - color[1].g;
sInt dirb = color[0].b - color[1].b;
sInt dots[16];
for(sInt i=0;i<16;i++)
dots[i] = block[i].r*dirr + block[i].g*dirg + block[i].b*dirb;
sInt stops[4];
for(sInt i=0;i<4;i++)
stops[i] = color[i].r*dirr + color[i].g*dirg + color[i].b*dirb;
sInt c0Point = (stops[1] + stops[3]) >> 1;
sInt halfPoint = (stops[3] + stops[2]) >> 1;
sInt c3Point = (stops[2] + stops[0]) >> 1;
if(!dither)
{
// the version without dithering is straightforward
for(sInt i=15;i>=0;i--)
{
mask <<= 2;
sInt dot = dots[i];
if(dot < halfPoint)
mask |= (dot < c0Point) ? 1 : 3;
else
mask |= (dot < c3Point) ? 2 : 0;
}
}
else
{
// with floyd-steinberg dithering (see above)
sInt err[8],*ep1 = err,*ep2 = err+4;
sInt *dp = dots;
c0Point <<= 4;
halfPoint <<= 4;
c3Point <<= 4;
for(sInt i=0;i<8;i++)
err[i] = 0;
for(sInt y=0;y<4;y++)
{
sInt dot,lmask,step;
// pixel 0
dot = (dp[0] << 4) + (3*ep2[1] + 5*ep2[0]);
if(dot < halfPoint)
step = (dot < c0Point) ? 1 : 3;
else
step = (dot < c3Point) ? 2 : 0;
ep1[0] = dp[0] - stops[step];
lmask = step;
// pixel 1
dot = (dp[1] << 4) + (7*ep1[0] + 3*ep2[2] + 5*ep2[1] + ep2[0]);
if(dot < halfPoint)
step = (dot < c0Point) ? 1 : 3;
else
step = (dot < c3Point) ? 2 : 0;
ep1[1] = dp[1] - stops[step];
lmask |= step<<2;
// pixel 2
dot = (dp[2] << 4) + (7*ep1[1] + 3*ep2[3] + 5*ep2[2] + ep2[1]);
if(dot < halfPoint)
step = (dot < c0Point) ? 1 : 3;
else
step = (dot < c3Point) ? 2 : 0;
ep1[2] = dp[2] - stops[step];
lmask |= step<<4;
// pixel 3
dot = (dp[3] << 4) + (7*ep1[2] + 5*ep2[3] + ep2[2]);
if(dot < halfPoint)
step = (dot < c0Point) ? 1 : 3;
else
step = (dot < c3Point) ? 2 : 0;
ep1[3] = dp[3] - stops[step];
lmask |= step<<6;
// advance to next line
sSwap(ep1,ep2);
dp += 4;
mask |= lmask << (y*8);
}
}
return mask;
}
// The color optimization function. (Clever code, part 1)
static void OptimizeColorsBlock(const Pixel *block,sU16 &max16,sU16 &min16)
{
static const sInt nIterPower = 4;
// determine color distribution
sInt mu[3],min[3],max[3];
for(sInt ch=0;ch<3;ch++)
{
const sU8 *bp = ((const sU8 *) block) + ch;
sInt muv,minv,maxv;
muv = minv = maxv = bp[0];
for(sInt i=4;i<64;i+=4)
{
muv += bp[i];
minv = sMin<sInt>(minv,bp[i]);
maxv = sMax<sInt>(maxv,bp[i]);
}
mu[ch] = (muv + 8) >> 4;
min[ch] = minv;
max[ch] = maxv;
}
// determine covariance matrix
sInt cov[6];
for(sInt i=0;i<6;i++)
cov[i] = 0;
for(sInt i=0;i<16;i++)
{
sInt r = block[i].r - mu[2];
sInt g = block[i].g - mu[1];
sInt b = block[i].b - mu[0];
cov[0] += r*r;
cov[1] += r*g;
cov[2] += r*b;
cov[3] += g*g;
cov[4] += g*b;
cov[5] += b*b;
}
// convert covariance matrix to float, find principal axis via power iter
sF32 covf[6],vfr,vfg,vfb;
for(sInt i=0;i<6;i++)
covf[i] = cov[i] / 255.0f;
vfr = max[2] - min[2];
vfg = max[1] - min[1];
vfb = max[0] - min[0];
for(sInt iter=0;iter<nIterPower;iter++)
{
sF32 r = vfr*covf[0] + vfg*covf[1] + vfb*covf[2];
sF32 g = vfr*covf[1] + vfg*covf[3] + vfb*covf[4];
sF32 b = vfr*covf[2] + vfg*covf[4] + vfb*covf[5];
vfr = r;
vfg = g;
vfb = b;
}
sF32 magn = sMax(sMax(sFAbs(vfr),sFAbs(vfg)),sFAbs(vfb));
sInt v_r,v_g,v_b;
if(magn < 4.0f) // too small, default to luminance
{
v_r = 148;
v_g = 300;
v_b = 58;
}
else
{
magn = 512.0f / magn;
v_r = vfr * magn;
v_g = vfg * magn;
v_b = vfb * magn;
}
// Pick colors at extreme points
sInt mind = 0x7fffffff,maxd = -0x7fffffff;
Pixel minp,maxp;
for(sInt i=0;i<16;i++)
{
sInt dot = block[i].r*v_r + block[i].g*v_g + block[i].b*v_b;
if(dot < mind)
{
mind = dot;
minp = block[i];
}
if(dot > maxd)
{
maxd = dot;
maxp = block[i];
}
}
// Reduce to 16 bit colors
max16 = maxp.As16Bit();
min16 = minp.As16Bit();
}
// The refinement function. (Clever code, part 2)
// Tries to optimize colors to suit block contents better.
// (By solving a least squares system via normal equations+Cramer's rule)
static sBool RefineBlock(const Pixel *block,sU16 &max16,sU16 &min16,sU32 mask)
{
static const sInt w1Tab[4] = { 3,0,2,1 };
static const sInt prods[4] = { 0x090000,0x000900,0x040102,0x010402 };
// ^some magic to save a lot of multiplies in the accumulating loop...
sInt akku = 0;
sInt At1_r,At1_g,At1_b;
sInt At2_r,At2_g,At2_b;
sU32 cm = mask;
At1_r = At1_g = At1_b = 0;
At2_r = At2_g = At2_b = 0;
for(sInt i=0;i<16;i++,cm>>=2)
{
sInt step = cm&3;
sInt w1 = w1Tab[step];
sInt r = block[i].r;
sInt g = block[i].g;
sInt b = block[i].b;
akku += prods[step];
At1_r += w1*r;
At1_g += w1*g;
At1_b += w1*b;
At2_r += r;
At2_g += g;
At2_b += b;
}
At2_r = 3*At2_r - At1_r;
At2_g = 3*At2_g - At1_g;
At2_b = 3*At2_b - At1_b;
// extract solutions and decide solvability
sInt xx = akku >> 16;
sInt yy = (akku >> 8) & 0xff;
sInt xy = (akku >> 0) & 0xff;
if(!yy || !xx || xx*yy == xy*xy)
return sFALSE;
sF32 frb = 3.0f * 31.0f / 255.0f / (xx*yy - xy*xy);
sF32 fg = frb * 63.0f / 31.0f;
sU16 oldMin = min16;
sU16 oldMax = max16;
// solve.
max16 = sClamp<sInt>((At1_r*yy - At2_r*xy)*frb+0.5f,0,31) << 11;
max16 |= sClamp<sInt>((At1_g*yy - At2_g*xy)*fg +0.5f,0,63) << 5;
max16 |= sClamp<sInt>((At1_b*yy - At2_b*xy)*frb+0.5f,0,31) << 0;
min16 = sClamp<sInt>((At2_r*xx - At1_r*xy)*frb+0.5f,0,31) << 11;
min16 |= sClamp<sInt>((At2_g*xx - At1_g*xy)*fg +0.5f,0,63) << 5;
min16 |= sClamp<sInt>((At2_b*xx - At1_b*xy)*frb+0.5f,0,31) << 0;
return oldMin != min16 || oldMax != max16;
}
// Color block compression
static void CompressColorBlock(sU8 *dest,const sU32 *src,sInt quality)
{
const Pixel *block = (const Pixel *) src;
Pixel dblock[16],color[4];
// check if block is constant
sU32 min,max;
min = max = block[0].v;
for(sInt i=1;i<16;i++)
{
min = sMin(min,block[i].v);
max = sMax(max,block[i].v);
}
// perform block compression
sU16 min16,max16;
sU32 mask;
if(min != max) // no constant color
{
// first step: compute dithered version for PCA if desired
if(quality)
DitherBlock(dblock,block);
// second step: pca+map along principal axis
OptimizeColorsBlock(quality ? dblock : block,max16,min16);
if(max16 != min16)
{
EvalColors(color,max16,min16);
mask = MatchColorsBlock(block,color,quality != 0);
}
else
mask = 0;
// third step: refine
if(RefineBlock(quality ? dblock : block,max16,min16,mask))
{
if(max16 != min16)
{
EvalColors(color,max16,min16);
mask = MatchColorsBlock(block,color,quality != 0);
}
else
mask = 0;
}
}
else // constant color
{
sInt r = block[0].r;
sInt g = block[0].g;
sInt b = block[0].b;
mask = 0xaaaaaaaa;
max16 = (OMatch5[r][0]<<11) | (OMatch6[g][0]<<5) | OMatch5[b][0];
min16 = (OMatch5[r][1]<<11) | (OMatch6[g][1]<<5) | OMatch5[b][1];
}
// write the color block
if(max16 < min16)
{
sSwap(max16,min16);
mask ^= 0x55555555;
}
((sU16 *) dest)[0] = max16;
((sU16 *) dest)[1] = min16;
((sU32 *) dest)[1] = mask;
}
// Alpha block compression (this is easy for a change)
static void CompressAlphaBlock(sU8 *dest,const sU32 *src,sInt quality)
{
quality;
const Pixel *block = (const Pixel *) src;
// find min/max color
sInt min,max;
min = max = block[0].a;
for(sInt i=1;i<16;i++)
{
min = sMin<sInt>(min,block[i].a);
max = sMax<sInt>(max,block[i].a);
}
// encode them
*dest++ = max;
*dest++ = min;
// determine bias and emit color indices
sInt dist = max-min;
sInt bias = min*7 - (dist >> 1);
sInt dist4 = dist*4;
sInt dist2 = dist*2;
sInt bits = 0,mask=0;
for(sInt i=0;i<16;i++)
{
sInt a = block[i].a*7 - bias;
sInt ind,t;
// select index (hooray for bit magic)
t = (dist4 - a) >> 31; ind = t & 4; a -= dist4 & t;
t = (dist2 - a) >> 31; ind += t & 2; a -= dist2 & t;
t = (dist - a) >> 31; ind += t & 1;
ind = -ind & 7;
ind ^= (2 > ind);
// write index
mask |= ind << bits;
if((bits += 3) >= 8)
{
*dest++ = mask;
mask >>= 8;
bits -= 8;
}
}
}
/****************************************************************************/
void sInitDXT()
{
for(sInt i=0;i<32;i++)
Expand5[i] = (i<<3)|(i>>2);
for(sInt i=0;i<64;i++)
Expand6[i] = (i<<2)|(i>>4);
for(sInt i=0;i<256+16;i++)
{
sInt v = sClamp(i-8,0,255);
QuantRBTab[i] = Expand5[Mul8Bit(v,31)];
QuantGTab[i] = Expand6[Mul8Bit(v,63)];
}
PrepareOptTable4(&OMatch5[0][0],Expand5,32);
PrepareOptTable4(&OMatch6[0][0],Expand6,64);
PrepareOptTable3(&OMatch5_3[0][0],Expand5,32);
PrepareOptTable3(&OMatch6_3[0][0],Expand6,64);
}
void sCompressDXTBlock(sU8 *dest,const sU32 *src,sBool alpha,sInt quality)
{
CRNLIB_ASSERT(Expand5[1]);
// if alpha specified, compress alpha as well
if(alpha)
{
CompressAlphaBlock(dest,src,quality);
dest += 8;
}
// compress the color part
CompressColorBlock(dest,src,quality);
}
void sCompressDXT5ABlock(sU8 *dest,const sU32 *src,sInt quality)
{
CRNLIB_ASSERT(Expand5[1]);
CompressAlphaBlock(dest,src,quality);
}
} // namespace ryg_dxt