917 lines
29 KiB
C++
917 lines
29 KiB
C++
|
// File: crn_dxt_fast.cpp
|
||
|
// See Copyright Notice and license at the end of inc/crnlib.h
|
||
|
// Parts of this module are derived from RYG's excellent public domain DXTx compressor.
|
||
|
#include "crn_core.h"
|
||
|
#include "crn_dxt_fast.h"
|
||
|
#include "crn_ryg_dxt.hpp"
|
||
|
|
||
|
namespace crnlib
|
||
|
{
|
||
|
namespace dxt_fast
|
||
|
{
|
||
|
static inline int mul_8bit(int a, int b)
|
||
|
{
|
||
|
int t = a * b + 128;
|
||
|
return (t + (t >> 8)) >> 8;
|
||
|
}
|
||
|
|
||
|
static inline color_quad_u8& unpack_color(color_quad_u8& c, uint v)
|
||
|
{
|
||
|
uint rv = (v & 0xf800) >> 11;
|
||
|
uint gv = (v & 0x07e0) >> 5;
|
||
|
uint bv = (v & 0x001f) >> 0;
|
||
|
|
||
|
c.r = ryg_dxt::Expand5[rv];
|
||
|
c.g = ryg_dxt::Expand6[gv];
|
||
|
c.b = ryg_dxt::Expand5[bv];
|
||
|
c.a = 0;
|
||
|
|
||
|
return c;
|
||
|
}
|
||
|
|
||
|
static inline uint pack_color(const color_quad_u8& c)
|
||
|
{
|
||
|
return (mul_8bit(c.r, 31) << 11) + (mul_8bit(c.g, 63) << 5) + mul_8bit(c.b, 31);
|
||
|
}
|
||
|
|
||
|
static inline void lerp_color(color_quad_u8& result, const color_quad_u8& p1, const color_quad_u8& p2, uint f)
|
||
|
{
|
||
|
CRNLIB_ASSERT(f <= 255);
|
||
|
|
||
|
result.r = static_cast<uint8>(p1.r + mul_8bit(p2.r - p1.r, f));
|
||
|
result.g = static_cast<uint8>(p1.g + mul_8bit(p2.g - p1.g, f));
|
||
|
result.b = static_cast<uint8>(p1.b + mul_8bit(p2.b - p1.b, f));
|
||
|
}
|
||
|
|
||
|
static inline void eval_colors(color_quad_u8* pColors, uint c0, uint c1)
|
||
|
{
|
||
|
unpack_color(pColors[0], c0);
|
||
|
unpack_color(pColors[1], c1);
|
||
|
|
||
|
#if 0
|
||
|
lerp_color(pColors[2], pColors[0], pColors[1], 0x55);
|
||
|
lerp_color(pColors[3], pColors[0], pColors[1], 0xAA);
|
||
|
#else
|
||
|
pColors[2].r = (pColors[0].r*2+pColors[1].r)/3;
|
||
|
pColors[2].g = (pColors[0].g*2+pColors[1].g)/3;
|
||
|
pColors[2].b = (pColors[0].b*2+pColors[1].b)/3;
|
||
|
|
||
|
pColors[3].r = (pColors[1].r*2+pColors[0].r)/3;
|
||
|
pColors[3].g = (pColors[1].g*2+pColors[0].g)/3;
|
||
|
pColors[3].b = (pColors[1].b*2+pColors[0].b)/3;
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
// false if all selectors equal
|
||
|
static bool match_block_colors(uint n, const color_quad_u8* pBlock, const color_quad_u8* pColors, uint8* pSelectors)
|
||
|
{
|
||
|
int dirr = pColors[0].r - pColors[1].r;
|
||
|
int dirg = pColors[0].g - pColors[1].g;
|
||
|
int dirb = pColors[0].b - pColors[1].b;
|
||
|
|
||
|
int stops[4];
|
||
|
for(int i = 0; i < 4; i++)
|
||
|
stops[i] = pColors[i].r*dirr + pColors[i].g*dirg + pColors[i].b*dirb;
|
||
|
|
||
|
// 0 2 3 1
|
||
|
int c0Point = stops[1] + stops[3];
|
||
|
int halfPoint = stops[3] + stops[2];
|
||
|
int c3Point = stops[2] + stops[0];
|
||
|
|
||
|
//dirr *= 2;
|
||
|
//dirg *= 2;
|
||
|
//dirb *= 2;
|
||
|
c0Point >>= 1;
|
||
|
halfPoint >>= 1;
|
||
|
c3Point >>= 1;
|
||
|
|
||
|
bool status = false;
|
||
|
for (uint i = 0; i < n; i++)
|
||
|
{
|
||
|
int dot = pBlock[i].r*dirr + pBlock[i].g*dirg + pBlock[i].b*dirb;
|
||
|
|
||
|
uint8 s;
|
||
|
if (dot < halfPoint)
|
||
|
s = (dot < c0Point) ? 1 : 3;
|
||
|
else
|
||
|
s = (dot < c3Point) ? 2 : 0;
|
||
|
|
||
|
pSelectors[i] = s;
|
||
|
|
||
|
if (s != pSelectors[0])
|
||
|
status = true;
|
||
|
}
|
||
|
|
||
|
return status;
|
||
|
}
|
||
|
|
||
|
static bool optimize_block_colors(uint n, const color_quad_u8* block, uint& max16, uint& min16, uint ave_color[3], float axis[3])
|
||
|
{
|
||
|
int min[3], max[3];
|
||
|
|
||
|
for(uint ch = 0; ch < 3; ch++)
|
||
|
{
|
||
|
const uint8 *bp = ((const uint8 *) block) + ch;
|
||
|
int minv, maxv;
|
||
|
|
||
|
int64 muv = bp[0];
|
||
|
minv = maxv = bp[0];
|
||
|
|
||
|
const uint l = n << 2;
|
||
|
for (uint i = 4; i < l; i += 4)
|
||
|
{
|
||
|
muv += bp[i];
|
||
|
minv = math::minimum<int>(minv, bp[i]);
|
||
|
maxv = math::maximum<int>(maxv, bp[i]);
|
||
|
}
|
||
|
|
||
|
ave_color[ch] = static_cast<int>((muv + (n / 2)) / n);
|
||
|
min[ch] = minv;
|
||
|
max[ch] = maxv;
|
||
|
}
|
||
|
|
||
|
if ((min[0] == max[0]) && (min[1] == max[1]) && (min[2] == max[2]))
|
||
|
return false;
|
||
|
|
||
|
// determine covariance matrix
|
||
|
double cov[6];
|
||
|
for(int i=0;i<6;i++)
|
||
|
cov[i] = 0;
|
||
|
|
||
|
for(uint i=0;i<n;i++)
|
||
|
{
|
||
|
double r = (int)block[i].r - (int)ave_color[0];
|
||
|
double g = (int)block[i].g - (int)ave_color[1];
|
||
|
double b = (int)block[i].b - (int)ave_color[2];
|
||
|
|
||
|
cov[0] += r*r;
|
||
|
cov[1] += r*g;
|
||
|
cov[2] += r*b;
|
||
|
cov[3] += g*g;
|
||
|
cov[4] += g*b;
|
||
|
cov[5] += b*b;
|
||
|
}
|
||
|
|
||
|
double covf[6],vfr,vfg,vfb;
|
||
|
for(int i=0;i<6;i++)
|
||
|
covf[i] = cov[i] * (1.0f/255.0f);
|
||
|
|
||
|
vfr = max[0] - min[0];
|
||
|
vfg = max[1] - min[1];
|
||
|
vfb = max[2] - min[2];
|
||
|
|
||
|
static const uint nIterPower = 4;
|
||
|
for(uint iter = 0; iter < nIterPower; iter++)
|
||
|
{
|
||
|
double r = vfr*covf[0] + vfg*covf[1] + vfb*covf[2];
|
||
|
double g = vfr*covf[1] + vfg*covf[3] + vfb*covf[4];
|
||
|
double b = vfr*covf[2] + vfg*covf[4] + vfb*covf[5];
|
||
|
|
||
|
vfr = r;
|
||
|
vfg = g;
|
||
|
vfb = b;
|
||
|
}
|
||
|
|
||
|
double magn = math::maximum(math::maximum(fabs(vfr),fabs(vfg)),fabs(vfb));
|
||
|
int v_r, v_g, v_b;
|
||
|
|
||
|
if (magn < 4.0f) // too small, default to luminance
|
||
|
{
|
||
|
v_r = 148;
|
||
|
v_g = 300;
|
||
|
v_b = 58;
|
||
|
|
||
|
axis[0] = (float)v_r;
|
||
|
axis[1] = (float)v_g;
|
||
|
axis[2] = (float)v_b;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
magn = 512.0f / magn;
|
||
|
vfr *= magn;
|
||
|
vfg *= magn;
|
||
|
vfb *= magn;
|
||
|
v_r = static_cast<int>(vfr);
|
||
|
v_g = static_cast<int>(vfg);
|
||
|
v_b = static_cast<int>(vfb);
|
||
|
|
||
|
axis[0] = (float)vfr;
|
||
|
axis[1] = (float)vfg;
|
||
|
axis[2] = (float)vfb;
|
||
|
}
|
||
|
|
||
|
int mind = block[0].r * v_r + block[0].g * v_g + block[0].b * v_b;
|
||
|
int maxd = mind;
|
||
|
color_quad_u8 minp(block[0]);
|
||
|
color_quad_u8 maxp(block[0]);
|
||
|
|
||
|
for(uint i = 1; i < n; i++)
|
||
|
{
|
||
|
int dot = block[i].r * v_r + block[i].g * v_g + block[i].b * v_b;
|
||
|
|
||
|
if (dot < mind)
|
||
|
{
|
||
|
mind = dot;
|
||
|
minp = block[i];
|
||
|
}
|
||
|
|
||
|
if (dot > maxd)
|
||
|
{
|
||
|
maxd = dot;
|
||
|
maxp = block[i];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
max16 = pack_color(maxp);
|
||
|
min16 = pack_color(minp);
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
// The refinement function. (Clever code, part 2)
|
||
|
// Tries to optimize colors to suit block contents better.
|
||
|
// (By solving a least squares system via normal equations+Cramer's rule)
|
||
|
static bool refine_block(uint n, const color_quad_u8 *block, uint &max16, uint &min16, const uint8* pSelectors)
|
||
|
{
|
||
|
static const int w1Tab[4] = { 3,0,2,1 };
|
||
|
|
||
|
static const int prods_0[4] = { 0x00,0x00,0x02,0x02 };
|
||
|
static const int prods_1[4] = { 0x00,0x09,0x01,0x04 };
|
||
|
static const int prods_2[4] = { 0x09,0x00,0x04,0x01 };
|
||
|
|
||
|
double akku_0 = 0;
|
||
|
double akku_1 = 0;
|
||
|
double akku_2 = 0;
|
||
|
double At1_r, At1_g, At1_b;
|
||
|
double At2_r, At2_g, At2_b;
|
||
|
|
||
|
At1_r = At1_g = At1_b = 0;
|
||
|
At2_r = At2_g = At2_b = 0;
|
||
|
for(uint i = 0; i < n; i++)
|
||
|
{
|
||
|
double r = block[i].r;
|
||
|
double g = block[i].g;
|
||
|
double b = block[i].b;
|
||
|
int step = pSelectors[i];
|
||
|
|
||
|
int w1 = w1Tab[step];
|
||
|
|
||
|
akku_0 += prods_0[step];
|
||
|
akku_1 += prods_1[step];
|
||
|
akku_2 += prods_2[step];
|
||
|
At1_r += w1*r;
|
||
|
At1_g += w1*g;
|
||
|
At1_b += w1*b;
|
||
|
At2_r += r;
|
||
|
At2_g += g;
|
||
|
At2_b += b;
|
||
|
}
|
||
|
|
||
|
At2_r = 3*At2_r - At1_r;
|
||
|
At2_g = 3*At2_g - At1_g;
|
||
|
At2_b = 3*At2_b - At1_b;
|
||
|
|
||
|
double xx = akku_2;
|
||
|
double yy = akku_1;
|
||
|
double xy = akku_0;
|
||
|
|
||
|
double t = xx * yy - xy * xy;
|
||
|
if (!yy || !xx || (fabs(t) < .0000125f))
|
||
|
return false;
|
||
|
|
||
|
double frb = (3.0f * 31.0f / 255.0f) / t;
|
||
|
double fg = frb * (63.0f / 31.0f);
|
||
|
|
||
|
uint oldMin = min16;
|
||
|
uint oldMax = max16;
|
||
|
|
||
|
// solve.
|
||
|
max16 = math::clamp<int>(static_cast<int>((At1_r*yy - At2_r*xy)*frb+0.5f),0,31) << 11;
|
||
|
max16 |= math::clamp<int>(static_cast<int>((At1_g*yy - At2_g*xy)*fg +0.5f),0,63) << 5;
|
||
|
max16 |= math::clamp<int>(static_cast<int>((At1_b*yy - At2_b*xy)*frb+0.5f),0,31) << 0;
|
||
|
|
||
|
min16 = math::clamp<int>(static_cast<int>((At2_r*xx - At1_r*xy)*frb+0.5f),0,31) << 11;
|
||
|
min16 |= math::clamp<int>(static_cast<int>((At2_g*xx - At1_g*xy)*fg +0.5f),0,63) << 5;
|
||
|
min16 |= math::clamp<int>(static_cast<int>((At2_b*xx - At1_b*xy)*frb+0.5f),0,31) << 0;
|
||
|
|
||
|
return (oldMin != min16) || (oldMax != max16);
|
||
|
}
|
||
|
|
||
|
// false if all selectors equal
|
||
|
static bool determine_selectors(uint n, const color_quad_u8* block, uint min16, uint max16, uint8* pSelectors)
|
||
|
{
|
||
|
color_quad_u8 color[4];
|
||
|
|
||
|
if (max16 != min16)
|
||
|
{
|
||
|
eval_colors(color, min16, max16);
|
||
|
|
||
|
return match_block_colors(n, block, color, pSelectors);
|
||
|
}
|
||
|
|
||
|
memset(pSelectors, 0, n);
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
static uint64 determine_error(uint n, const color_quad_u8* block, uint min16, uint max16, uint64 early_out_error)
|
||
|
{
|
||
|
color_quad_u8 color[4];
|
||
|
|
||
|
eval_colors(color, min16, max16);
|
||
|
|
||
|
int dirr = color[0].r - color[1].r;
|
||
|
int dirg = color[0].g - color[1].g;
|
||
|
int dirb = color[0].b - color[1].b;
|
||
|
|
||
|
int stops[4];
|
||
|
for(int i = 0; i < 4; i++)
|
||
|
stops[i] = color[i].r*dirr + color[i].g*dirg + color[i].b*dirb;
|
||
|
|
||
|
// 0 2 3 1
|
||
|
int c0Point = stops[1] + stops[3];
|
||
|
int halfPoint = stops[3] + stops[2];
|
||
|
int c3Point = stops[2] + stops[0];
|
||
|
|
||
|
c0Point >>= 1;
|
||
|
halfPoint >>= 1;
|
||
|
c3Point >>= 1;
|
||
|
|
||
|
uint64 total_error = 0;
|
||
|
|
||
|
for (uint i = 0; i < n; i++)
|
||
|
{
|
||
|
const color_quad_u8& a = block[i];
|
||
|
|
||
|
uint s = 0;
|
||
|
if (min16 != max16)
|
||
|
{
|
||
|
int dot = a.r*dirr + a.g*dirg + a.b*dirb;
|
||
|
|
||
|
if (dot < halfPoint)
|
||
|
s = (dot < c0Point) ? 1 : 3;
|
||
|
else
|
||
|
s = (dot < c3Point) ? 2 : 0;
|
||
|
}
|
||
|
|
||
|
const color_quad_u8& b = color[s];
|
||
|
|
||
|
int e = a[0] - b[0];
|
||
|
total_error += e * e;
|
||
|
|
||
|
e = a[1] - b[1];
|
||
|
total_error += e * e;
|
||
|
|
||
|
e = a[2] - b[2];
|
||
|
total_error += e * e;
|
||
|
|
||
|
if (total_error >= early_out_error)
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
return total_error;
|
||
|
}
|
||
|
|
||
|
static bool refine_endpoints(uint n, const color_quad_u8* pBlock, uint& low16, uint& high16, uint8* pSelectors)
|
||
|
{
|
||
|
bool optimized = false;
|
||
|
|
||
|
const int limits[3] = { 31, 63, 31 };
|
||
|
|
||
|
for (uint trial = 0; trial < 2; trial++)
|
||
|
{
|
||
|
color_quad_u8 color[4];
|
||
|
eval_colors(color, low16, high16);
|
||
|
|
||
|
uint64 total_error[3] = { 0, 0, 0 };
|
||
|
|
||
|
for (uint i = 0; i < n; i++)
|
||
|
{
|
||
|
const color_quad_u8& a = pBlock[i];
|
||
|
|
||
|
const uint s = pSelectors[i];
|
||
|
const color_quad_u8& b = color[s];
|
||
|
|
||
|
int e = a[0] - b[0];
|
||
|
total_error[0] += e * e;
|
||
|
|
||
|
e = a[1] - b[1];
|
||
|
total_error[1] += e * e;
|
||
|
|
||
|
e = a[2] - b[2];
|
||
|
total_error[2] += e * e;
|
||
|
}
|
||
|
|
||
|
color_quad_u8 endpoints[2];
|
||
|
endpoints[0] = dxt1_block::unpack_color((uint16)low16, false);
|
||
|
endpoints[1] = dxt1_block::unpack_color((uint16)high16, false);
|
||
|
|
||
|
color_quad_u8 expanded_endpoints[2];
|
||
|
expanded_endpoints[0] = dxt1_block::unpack_color((uint16)low16, true);
|
||
|
expanded_endpoints[1] = dxt1_block::unpack_color((uint16)high16, true);
|
||
|
|
||
|
bool trial_optimized = false;
|
||
|
|
||
|
for (uint axis = 0; axis < 3; axis++)
|
||
|
{
|
||
|
if (!total_error[axis])
|
||
|
continue;
|
||
|
|
||
|
const sU8* const pExpand = (axis == 1) ? ryg_dxt::Expand6 : ryg_dxt::Expand5;
|
||
|
|
||
|
for (uint e = 0; e < 2; e++)
|
||
|
{
|
||
|
uint v[4];
|
||
|
v[e^1] = expanded_endpoints[e^1][axis];
|
||
|
|
||
|
for (int t = -1; t <= 1; t += 2)
|
||
|
{
|
||
|
int a = endpoints[e][axis] + t;
|
||
|
if ((a < 0) || (a > limits[axis]))
|
||
|
continue;
|
||
|
|
||
|
v[e] = pExpand[a];
|
||
|
|
||
|
//int delta = v[1] - v[0];
|
||
|
//v[2] = v[0] + mul_8bit(delta, 0x55);
|
||
|
//v[3] = v[0] + mul_8bit(delta, 0xAA);
|
||
|
|
||
|
v[2] = (v[0] * 2 + v[1]) / 3;
|
||
|
v[3] = (v[0] + v[1] * 2) / 3;
|
||
|
|
||
|
uint64 axis_error = 0;
|
||
|
|
||
|
for (uint i = 0; i < n; i++)
|
||
|
{
|
||
|
const color_quad_u8& p = pBlock[i];
|
||
|
|
||
|
int e = v[pSelectors[i]] - p[axis];
|
||
|
|
||
|
axis_error += e * e;
|
||
|
|
||
|
if (axis_error >= total_error[axis])
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
if (axis_error < total_error[axis])
|
||
|
{
|
||
|
//total_error[axis] = axis_error;
|
||
|
|
||
|
endpoints[e][axis] = (uint8)a;
|
||
|
expanded_endpoints[e][axis] = (uint8)v[e];
|
||
|
|
||
|
if (e)
|
||
|
high16 = dxt1_block::pack_color(endpoints[1], false);
|
||
|
else
|
||
|
low16 = dxt1_block::pack_color(endpoints[0], false);
|
||
|
|
||
|
determine_selectors(n, pBlock, low16, high16, pSelectors);
|
||
|
|
||
|
eval_colors(color, low16, high16);
|
||
|
|
||
|
utils::zero_object(total_error);
|
||
|
|
||
|
for (uint i = 0; i < n; i++)
|
||
|
{
|
||
|
const color_quad_u8& a = pBlock[i];
|
||
|
|
||
|
const uint s = pSelectors[i];
|
||
|
const color_quad_u8& b = color[s];
|
||
|
|
||
|
int e = a[0] - b[0];
|
||
|
total_error[0] += e * e;
|
||
|
|
||
|
e = a[1] - b[1];
|
||
|
total_error[1] += e * e;
|
||
|
|
||
|
e = a[2] - b[2];
|
||
|
total_error[2] += e * e;
|
||
|
}
|
||
|
|
||
|
trial_optimized = true;
|
||
|
}
|
||
|
|
||
|
} // t
|
||
|
|
||
|
} // e
|
||
|
} // axis
|
||
|
|
||
|
if (!trial_optimized)
|
||
|
break;
|
||
|
|
||
|
optimized = true;
|
||
|
|
||
|
} // for ( ; ; )
|
||
|
|
||
|
return optimized;
|
||
|
}
|
||
|
|
||
|
static void refine_endpoints2(uint n, const color_quad_u8* pBlock, uint& low16, uint& high16, uint8* pSelectors, float axis[3])
|
||
|
{
|
||
|
uint64 orig_error = determine_error(n, pBlock, low16, high16, cUINT64_MAX);
|
||
|
if (!orig_error)
|
||
|
return;
|
||
|
|
||
|
float l = 1.0f / sqrt(axis[0]*axis[0] + axis[1]*axis[1] + axis[2]*axis[2]);
|
||
|
vec3F principle_axis(axis[0] * l, axis[1] * l, axis[2] * l);
|
||
|
|
||
|
const float dist_per_trial = 0.027063293f;
|
||
|
|
||
|
const uint cMaxProbeRange = 8;
|
||
|
uint probe_low[cMaxProbeRange * 2 + 1];
|
||
|
uint probe_high[cMaxProbeRange * 2 + 1];
|
||
|
|
||
|
int probe_range = 8;
|
||
|
uint num_iters = 4;
|
||
|
|
||
|
const uint num_trials = probe_range * 2 + 1;
|
||
|
|
||
|
vec3F scaled_principle_axis(principle_axis * dist_per_trial);
|
||
|
scaled_principle_axis[0] *= 31.0f;
|
||
|
scaled_principle_axis[1] *= 63.0f;
|
||
|
scaled_principle_axis[2] *= 31.0f;
|
||
|
vec3F initial_ofs(scaled_principle_axis * (float)-probe_range);
|
||
|
initial_ofs[0] += .5f;
|
||
|
initial_ofs[1] += .5f;
|
||
|
initial_ofs[2] += .5f;
|
||
|
|
||
|
uint64 cur_error = orig_error;
|
||
|
|
||
|
for (uint iter = 0; iter < num_iters; iter++)
|
||
|
{
|
||
|
color_quad_u8 endpoints[2];
|
||
|
|
||
|
endpoints[0] = dxt1_block::unpack_color((uint16)low16, false);
|
||
|
endpoints[1] = dxt1_block::unpack_color((uint16)high16, false);
|
||
|
|
||
|
vec3F low_color(endpoints[0][0], endpoints[0][1], endpoints[0][2]);
|
||
|
vec3F high_color(endpoints[1][0], endpoints[1][1], endpoints[1][2]);
|
||
|
|
||
|
vec3F probe_low_color(low_color + initial_ofs);
|
||
|
for (uint i = 0; i < num_trials; i++)
|
||
|
{
|
||
|
int r = math::clamp((int)floor(probe_low_color[0]), 0, 31);
|
||
|
int g = math::clamp((int)floor(probe_low_color[1]), 0, 63);
|
||
|
int b = math::clamp((int)floor(probe_low_color[2]), 0, 31);
|
||
|
probe_low[i] = b | (g << 5U) | (r << 11U);
|
||
|
|
||
|
probe_low_color += scaled_principle_axis;
|
||
|
}
|
||
|
|
||
|
vec3F probe_high_color(high_color + initial_ofs);
|
||
|
for (uint i = 0; i < num_trials; i++)
|
||
|
{
|
||
|
int r = math::clamp((int)floor(probe_high_color[0]), 0, 31);
|
||
|
int g = math::clamp((int)floor(probe_high_color[1]), 0, 63);
|
||
|
int b = math::clamp((int)floor(probe_high_color[2]), 0, 31);
|
||
|
probe_high[i] = b | (g << 5U) | (r << 11U);
|
||
|
|
||
|
probe_high_color += scaled_principle_axis;
|
||
|
}
|
||
|
|
||
|
uint best_l = low16;
|
||
|
uint best_h = high16;
|
||
|
|
||
|
enum { cMaxHash = 4 };
|
||
|
uint64 hash[cMaxHash];
|
||
|
for (uint i = 0; i < cMaxHash; i++)
|
||
|
hash[i] = 0;
|
||
|
|
||
|
uint c = best_l | (best_h << 16);
|
||
|
c = fast_hash(&c, sizeof(c));
|
||
|
hash[(c >> 6) & 3] = 1ULL << (c & 63);
|
||
|
|
||
|
for (uint i = 0; i < num_trials; i++)
|
||
|
{
|
||
|
for (uint j = 0; j < num_trials; j++)
|
||
|
{
|
||
|
uint l = probe_low[i];
|
||
|
uint h = probe_high[j];
|
||
|
if (l < h)
|
||
|
utils::swap(l, h);
|
||
|
|
||
|
uint c = l | (h << 16);
|
||
|
c = fast_hash(&c, sizeof(c));
|
||
|
uint64 mask = 1ULL << (c & 63);
|
||
|
uint ofs = (c >> 6) & 3;
|
||
|
if (hash[ofs] & mask)
|
||
|
continue;
|
||
|
|
||
|
hash[ofs] |= mask;
|
||
|
|
||
|
uint64 new_error = determine_error(n, pBlock, l, h, cur_error);
|
||
|
if (new_error < cur_error)
|
||
|
{
|
||
|
best_l = l;
|
||
|
best_h = h;
|
||
|
cur_error = new_error;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
bool improved = false;
|
||
|
|
||
|
if ((best_l != low16) || (best_h != high16))
|
||
|
{
|
||
|
low16 = best_l;
|
||
|
high16 = best_h;
|
||
|
|
||
|
determine_selectors(n, pBlock, low16, high16, pSelectors);
|
||
|
improved = true;
|
||
|
}
|
||
|
|
||
|
if (refine_endpoints(n, pBlock, low16, high16, pSelectors))
|
||
|
{
|
||
|
improved = true;
|
||
|
|
||
|
uint64 cur_error = determine_error(n, pBlock, low16, high16, cUINT64_MAX);
|
||
|
if (!cur_error)
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
if (!improved)
|
||
|
break;
|
||
|
|
||
|
} // iter
|
||
|
|
||
|
//uint64 end_error = determine_error(n, pBlock, low16, high16, UINT64_MAX);
|
||
|
//if (end_error > orig_error) DebugBreak();
|
||
|
}
|
||
|
|
||
|
static void compress_solid_block(uint n, uint ave_color[3], uint& low16, uint& high16, uint8* pSelectors)
|
||
|
{
|
||
|
uint r = ave_color[0];
|
||
|
uint g = ave_color[1];
|
||
|
uint b = ave_color[2];
|
||
|
|
||
|
memset(pSelectors, 2, n);
|
||
|
|
||
|
low16 = (ryg_dxt::OMatch5[r][0]<<11) | (ryg_dxt::OMatch6[g][0]<<5) | ryg_dxt::OMatch5[b][0];
|
||
|
high16 = (ryg_dxt::OMatch5[r][1]<<11) | (ryg_dxt::OMatch6[g][1]<<5) | ryg_dxt::OMatch5[b][1];
|
||
|
}
|
||
|
|
||
|
void compress_color_block(uint n, const color_quad_u8* block, uint& low16, uint& high16, uint8* pSelectors, bool refine)
|
||
|
{
|
||
|
CRNLIB_ASSERT((n & 15) == 0);
|
||
|
|
||
|
uint ave_color[3];
|
||
|
float axis[3];
|
||
|
|
||
|
if (!optimize_block_colors(n, block, low16, high16, ave_color, axis))
|
||
|
{
|
||
|
compress_solid_block(n, ave_color, low16, high16, pSelectors);
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
if (!determine_selectors(n, block, low16, high16, pSelectors))
|
||
|
compress_solid_block(n, ave_color, low16, high16, pSelectors);
|
||
|
else
|
||
|
{
|
||
|
if (refine_block(n, block, low16, high16, pSelectors))
|
||
|
determine_selectors(n, block, low16, high16, pSelectors);
|
||
|
|
||
|
if (refine)
|
||
|
refine_endpoints2(n, block, low16, high16, pSelectors, axis);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (low16 < high16)
|
||
|
{
|
||
|
utils::swap(low16, high16);
|
||
|
for (uint i = 0; i < n; i++)
|
||
|
pSelectors[i] ^= 1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void compress_color_block(dxt1_block* pDXT1_block, const color_quad_u8* pBlock, bool refine)
|
||
|
{
|
||
|
uint8 color_selectors[16];
|
||
|
uint low16, high16;
|
||
|
dxt_fast::compress_color_block(16, pBlock, low16, high16, color_selectors, refine);
|
||
|
|
||
|
pDXT1_block->set_low_color(static_cast<uint16>(low16));
|
||
|
pDXT1_block->set_high_color(static_cast<uint16>(high16));
|
||
|
|
||
|
uint mask = 0;
|
||
|
for (int i = 15; i >= 0; i--)
|
||
|
{
|
||
|
mask <<= 2;
|
||
|
mask |= color_selectors[i];
|
||
|
}
|
||
|
|
||
|
pDXT1_block->m_selectors[0] = (uint8)(mask & 0xFF);
|
||
|
pDXT1_block->m_selectors[1] = (uint8)((mask >> 8) & 0xFF);
|
||
|
pDXT1_block->m_selectors[2] = (uint8)((mask >> 16) & 0xFF);
|
||
|
pDXT1_block->m_selectors[3] = (uint8)((mask >> 24) & 0xFF);
|
||
|
}
|
||
|
|
||
|
void compress_alpha_block(uint n, const color_quad_u8* block, uint& low8, uint& high8, uint8* pSelectors, uint comp_index)
|
||
|
{
|
||
|
int min, max;
|
||
|
min = max = block[0][comp_index];
|
||
|
|
||
|
for (uint i = 1; i < n; i++)
|
||
|
{
|
||
|
min = math::minimum<int>(min, block[i][comp_index]);
|
||
|
max = math::maximum<int>(max, block[i][comp_index]);
|
||
|
}
|
||
|
|
||
|
low8 = max;
|
||
|
high8 = min;
|
||
|
|
||
|
int dist = max-min;
|
||
|
int bias = min*7 - (dist >> 1);
|
||
|
int dist4 = dist*4;
|
||
|
int dist2 = dist*2;
|
||
|
|
||
|
for (uint i = 0; i < n; i++)
|
||
|
{
|
||
|
int a = block[i][comp_index]*7 - bias;
|
||
|
int ind,t;
|
||
|
|
||
|
t = (dist4 - a) >> 31; ind = t & 4; a -= dist4 & t;
|
||
|
t = (dist2 - a) >> 31; ind += t & 2; a -= dist2 & t;
|
||
|
t = (dist - a) >> 31; ind += t & 1;
|
||
|
|
||
|
ind = -ind & 7;
|
||
|
ind ^= (2 > ind);
|
||
|
|
||
|
pSelectors[i] = static_cast<uint8>(ind);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void compress_alpha_block(dxt5_block* pDXT5_block, const color_quad_u8* pBlock, uint comp_index)
|
||
|
{
|
||
|
uint8 selectors[16];
|
||
|
uint low8, high8;
|
||
|
|
||
|
compress_alpha_block(16, pBlock, low8, high8, selectors, comp_index);
|
||
|
|
||
|
pDXT5_block->set_low_alpha(low8);
|
||
|
pDXT5_block->set_high_alpha(high8);
|
||
|
|
||
|
uint mask = 0;
|
||
|
uint bits = 0;
|
||
|
uint8* pDst = pDXT5_block->m_selectors;
|
||
|
|
||
|
for (uint i = 0; i < 16; i++)
|
||
|
{
|
||
|
mask |= (selectors[i] << bits);
|
||
|
|
||
|
if ((bits += 3) >= 8)
|
||
|
{
|
||
|
*pDst++ = static_cast<uint8>(mask);
|
||
|
mask >>= 8;
|
||
|
bits -= 8;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void find_representative_colors(uint n, const color_quad_u8* pBlock, color_quad_u8& lo, color_quad_u8& hi)
|
||
|
{
|
||
|
uint64 ave64[3];
|
||
|
ave64[0] = 0;
|
||
|
ave64[1] = 0;
|
||
|
ave64[2] = 0;
|
||
|
|
||
|
for (uint i = 0; i < n; i++)
|
||
|
{
|
||
|
ave64[0] += pBlock[i].r;
|
||
|
ave64[1] += pBlock[i].g;
|
||
|
ave64[2] += pBlock[i].b;
|
||
|
}
|
||
|
|
||
|
uint ave[3];
|
||
|
ave[0] = static_cast<uint>((ave64[0] + (n / 2)) / n);
|
||
|
ave[1] = static_cast<uint>((ave64[1] + (n / 2)) / n);
|
||
|
ave[2] = static_cast<uint>((ave64[2] + (n / 2)) / n);
|
||
|
|
||
|
int furthest_dist = -1;
|
||
|
uint furthest_index = 0;
|
||
|
for (uint i = 0; i < n; i++)
|
||
|
{
|
||
|
int r = pBlock[i].r - ave[0];
|
||
|
int g = pBlock[i].g - ave[1];
|
||
|
int b = pBlock[i].b - ave[2];
|
||
|
int dist = r*r + g*g + b*b;
|
||
|
if (dist > furthest_dist)
|
||
|
{
|
||
|
furthest_dist = dist;
|
||
|
furthest_index = i;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
color_quad_u8 lo_color(pBlock[furthest_index]);
|
||
|
|
||
|
int opp_dist = -1;
|
||
|
uint opp_index = 0;
|
||
|
for (uint i = 0; i < n; i++)
|
||
|
{
|
||
|
int r = pBlock[i].r - lo_color.r;
|
||
|
int g = pBlock[i].g - lo_color.g;
|
||
|
int b = pBlock[i].b - lo_color.b;
|
||
|
int dist = r*r + g*g + b*b;
|
||
|
if (dist > opp_dist)
|
||
|
{
|
||
|
opp_dist = dist;
|
||
|
opp_index = i;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
color_quad_u8 hi_color(pBlock[opp_index]);
|
||
|
|
||
|
for (uint i = 0; i < 3; i++)
|
||
|
{
|
||
|
lo_color[i] = static_cast<uint8>((lo_color[i] + ave[i]) >> 1);
|
||
|
hi_color[i] = static_cast<uint8>((hi_color[i] + ave[i]) >> 1);
|
||
|
}
|
||
|
|
||
|
const uint cMaxIters = 4;
|
||
|
for (uint iter_index = 0; iter_index < cMaxIters; iter_index++)
|
||
|
{
|
||
|
if ((lo_color[0] == hi_color[0]) && (lo_color[1] == hi_color[1]) && (lo_color[2] == hi_color[2]))
|
||
|
break;
|
||
|
|
||
|
uint64 new_color[2][3];
|
||
|
uint weight[2];
|
||
|
|
||
|
utils::zero_object(new_color);
|
||
|
utils::zero_object(weight);
|
||
|
|
||
|
int vec_r = hi_color[0] - lo_color[0];
|
||
|
int vec_g = hi_color[1] - lo_color[1];
|
||
|
int vec_b = hi_color[2] - lo_color[2];
|
||
|
|
||
|
int lo_dot = vec_r * lo_color[0] + vec_g * lo_color[1] + vec_b * lo_color[2];
|
||
|
int hi_dot = vec_r * hi_color[0] + vec_g * hi_color[1] + vec_b * hi_color[2];
|
||
|
int mid_dot = lo_dot + hi_dot;
|
||
|
|
||
|
vec_r *= 2;
|
||
|
vec_g *= 2;
|
||
|
vec_b *= 2;
|
||
|
|
||
|
for (uint i = 0; i < n; i++)
|
||
|
{
|
||
|
const color_quad_u8& c = pBlock[i];
|
||
|
|
||
|
const int dot = c[0] * vec_r + c[1] * vec_g + c[2] * vec_b;
|
||
|
const uint match_index = (dot > mid_dot);
|
||
|
|
||
|
new_color[match_index][0] += c.r;
|
||
|
new_color[match_index][1] += c.g;
|
||
|
new_color[match_index][2] += c.b;
|
||
|
weight[match_index]++;
|
||
|
}
|
||
|
|
||
|
if ((!weight[0]) || (!weight[1]))
|
||
|
break;
|
||
|
|
||
|
uint8 new_color8[2][3];
|
||
|
|
||
|
for (uint j = 0; j < 2; j++)
|
||
|
for (uint i = 0; i < 3; i++)
|
||
|
new_color8[j][i] = static_cast<uint8>((new_color[j][i] + (weight[j] / 2)) / weight[j]);
|
||
|
|
||
|
if ((new_color8[0][0] == lo_color[0]) && (new_color8[0][1] == lo_color[1]) && (new_color8[0][2] == lo_color[2]) &&
|
||
|
(new_color8[1][0] == hi_color[0]) && (new_color8[1][1] == hi_color[1]) && (new_color8[1][2] == hi_color[2]))
|
||
|
break;
|
||
|
|
||
|
for (uint i = 0; i < 3; i++)
|
||
|
{
|
||
|
lo_color[i] = new_color8[0][i];
|
||
|
hi_color[i] = new_color8[1][i];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
uint energy[2] = { 0, 0 };
|
||
|
for (uint i = 0; i < 3; i++)
|
||
|
{
|
||
|
energy[0] += lo_color[i] * lo_color[i];
|
||
|
energy[1] += hi_color[i] * hi_color[i];
|
||
|
}
|
||
|
|
||
|
if (energy[0] > energy[1])
|
||
|
utils::swap(lo_color, hi_color);
|
||
|
|
||
|
lo = lo_color;
|
||
|
hi = hi_color;
|
||
|
}
|
||
|
|
||
|
} // namespace dxt_fast
|
||
|
|
||
|
} // namespace crnlib
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
|