
884 lines
25 KiB

// Project64 - A Nintendo 64 emulator
// Copyright(C) 2001-2021 Project64
// Copyright(C) 2007 Hiroshi Morii
// Copyright(C) 2004 Daniel Borca
// GNU/GPLv2 licensed:
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <stdio.h>
#include <stdint.h>
#include "types.h"
#include "internal.h"
#include "dxtn.h"
DXTn encoder
The encoder was built by reversing the decoder,
and is vaguely based on FXT1 codec. Note that this code
is merely a proof of concept, since it is highly unoptimized!
#define MAX_COMP 4 // Ever needed maximum number of components in texel
#define MAX_VECT 4 // Ever needed maximum number of base vectors to find
#define N_TEXELS 16 // Number of texels in a block (always 16)
#define COLOR565(v) (word)((((v)[RCOMP] & 0xf8) << 8) | (((v)[GCOMP] & 0xfc) << 3) | ((v)[BCOMP] >> 3))
static const int dxtn_color_tlat[2][4] = {
{ 0, 2, 3, 1 },
{ 0, 2, 1, 3 }
static const int dxtn_alpha_tlat[2][8] = {
{ 0, 2, 3, 4, 5, 6, 7, 1 },
{ 0, 2, 3, 4, 5, 1, 6, 7 }
static void
dxt1_rgb_quantize(dword *cc, const byte *lines[], int comps)
float b, iv[MAX_COMP]; // Interpolation vector
dword hi; // High doubleword
int color0, color1;
int n_vect;
const int n_comp = 3;
int black = 0;
#ifndef YUV
int minSum = 2000; // Big enough
int minSum = 2000000;
int maxSum = -1; // Small enough
int minCol = 0; // phoudoin: Silent compiler!
int maxCol = 0; // phoudoin: Silent compiler!
byte input[N_TEXELS][MAX_COMP];
int i, k, l;
// Make the whole block opaque
// We will NEVER reference ACOMP of any pixel
// 4 texels each line
#ifndef ARGB
for (l = 0; l < 4; l++) {
for (k = 0; k < 4; k++) {
for (i = 0; i < comps; i++) {
input[k + l * 4][i] = *lines[l]++;
// H.Morii - Support for ARGB inputs
for (l = 0; l < 4; l++) {
for (k = 0; k < 4; k++) {
input[k + l * 4][2] = *lines[l]++;
input[k + l * 4][1] = *lines[l]++;
input[k + l * 4][0] = *lines[l]++;
if (comps == 4) input[k + l * 4][3] = *lines[l]++;
Our solution here is to find the darkest and brightest colors in
the 4x4 tile and use those as the two representative colors.
There are probably better algorithms to use (histogram-based).
for (k = 0; k < N_TEXELS; k++) {
int sum = 0;
#ifndef YUV
for (i = 0; i < n_comp; i++) {
sum += input[k][i];
/* RGB to YUV conversion according to CCIR 601 specs
Y = 0.299R+0.587G+0.114B
U = 0.713(R - Y) = 0.500R-0.419G-0.081B
V = 0.564(B - Y) = -0.169R-0.331G+0.500B
sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP];
if (minSum > sum) {
minSum = sum;
minCol = k;
if (maxSum < sum) {
maxSum = sum;
maxCol = k;
if (sum == 0) {
black = 1;
color0 = COLOR565(input[minCol]);
color1 = COLOR565(input[maxCol]);
if (color0 == color1) {
// We'll use 3-vector
cc[0] = color0 | (color1 << 16);
hi = black ? -1 : 0;
else {
if (black && ((color0 == 0) || (color1 == 0))) {
// We still can use 4-vector
black = 0;
if (black ^ (color0 <= color1)) {
int aux;
aux = color0;
color0 = color1;
color1 = aux;
aux = minCol;
minCol = maxCol;
maxCol = aux;
n_vect = (color0 <= color1) ? 2 : 3;
MAKEIVEC(n_vect, n_comp, iv, b, input[minCol], input[maxCol]);
// Add in texels
cc[0] = color0 | (color1 << 16);
hi = 0;
for (k = N_TEXELS - 1; k >= 0; k--) {
int texel = 3;
int sum = 0;
if (black) {
for (i = 0; i < n_comp; i++) {
sum += input[k][i];
if (!black || sum) {
// Interpolate color
CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
texel = dxtn_color_tlat[black][texel];
// Add in texel
hi <<= 2;
hi |= texel;
cc[1] = hi;
static void
dxt1_rgba_quantize(dword *cc, const byte *lines[], int comps)
float b, iv[MAX_COMP]; // Interpolation vector
dword hi; // High doubleword
int color0, color1;
int n_vect;
const int n_comp = 3;
int transparent = 0;
#ifndef YUV
int minSum = 2000; // Big enough
int minSum = 2000000;
int maxSum = -1; // Small enough
int minCol = 0; // phoudoin: Silent compiler!
int maxCol = 0; // phoudoin: Silent compiler!
byte input[N_TEXELS][MAX_COMP];
int i, k, l;
if (comps == 3) {
// Make the whole block opaque
memset(input, -1, sizeof(input));
// 4 texels each line
#ifndef ARGB
for (l = 0; l < 4; l++) {
for (k = 0; k < 4; k++) {
for (i = 0; i < comps; i++) {
input[k + l * 4][i] = *lines[l]++;
// H.Morii - Support for ARGB inputs
for (l = 0; l < 4; l++) {
for (k = 0; k < 4; k++) {
input[k + l * 4][2] = *lines[l]++;
input[k + l * 4][1] = *lines[l]++;
input[k + l * 4][0] = *lines[l]++;
if (comps == 4) input[k + l * 4][3] = *lines[l]++;
Our solution here is to find the darkest and brightest colors in
the 4x4 tile and use those as the two representative colors.
There are probably better algorithms to use (histogram-based).
for (k = 0; k < N_TEXELS; k++) {
int sum = 0;
#ifndef YUV
for (i = 0; i < n_comp; i++) {
sum += input[k][i];
sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP];
if (minSum > sum) {
minSum = sum;
minCol = k;
if (maxSum < sum) {
maxSum = sum;
maxCol = k;
if (input[k][ACOMP] < 128) {
transparent = 1;
color0 = COLOR565(input[minCol]);
color1 = COLOR565(input[maxCol]);
if (color0 == color1) {
// We'll use 3-vector
cc[0] = color0 | (color1 << 16);
hi = transparent ? -1 : 0;
else {
if (transparent ^ (color0 <= color1)) {
int aux;
aux = color0;
color0 = color1;
color1 = aux;
aux = minCol;
minCol = maxCol;
maxCol = aux;
n_vect = (color0 <= color1) ? 2 : 3;
MAKEIVEC(n_vect, n_comp, iv, b, input[minCol], input[maxCol]);
// Add in texels
cc[0] = color0 | (color1 << 16);
hi = 0;
for (k = N_TEXELS - 1; k >= 0; k--) {
int texel = 3;
if (input[k][ACOMP] >= 128) {
// Interpolate color
CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
texel = dxtn_color_tlat[transparent][texel];
// Add in texel
hi <<= 2;
hi |= texel;
cc[1] = hi;
static void
dxt3_rgba_quantize(dword *cc, const byte *lines[], int comps)
float b, iv[MAX_COMP]; // Interpolation vector
dword lolo, lohi; // Low quadword: lo DWORD, hi DWORD
dword hihi; // High quadword: high DWORD
int color0, color1;
const int n_vect = 3;
const int n_comp = 3;
#ifndef YUV
int minSum = 2000; // Big enough
int minSum = 2000000;
int maxSum = -1; // Small enough
int minCol = 0; // phoudoin: Silent compiler!
int maxCol = 0; // phoudoin: Silent compiler!
byte input[N_TEXELS][MAX_COMP];
int i, k, l;
if (comps == 3) {
// Make the whole block opaque
memset(input, -1, sizeof(input));
// 4 texels each line
#ifndef ARGB
for (l = 0; l < 4; l++) {
for (k = 0; k < 4; k++) {
for (i = 0; i < comps; i++) {
input[k + l * 4][i] = *lines[l]++;
// H.Morii - Support for ARGB inputs
for (l = 0; l < 4; l++) {
for (k = 0; k < 4; k++) {
input[k + l * 4][2] = *lines[l]++;
input[k + l * 4][1] = *lines[l]++;
input[k + l * 4][0] = *lines[l]++;
if (comps == 4) input[k + l * 4][3] = *lines[l]++;
/* TODO:
Our solution here is to find the darkest and brightest colors in
the 4x4 tile and use those as the two representative colors.
There are probably better algorithms to use (histogram-based).
for (k = 0; k < N_TEXELS; k++) {
int sum = 0;
#ifndef YUV
for (i = 0; i < n_comp; i++) {
sum += input[k][i];
sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP];
if (minSum > sum) {
minSum = sum;
minCol = k;
if (maxSum < sum) {
maxSum = sum;
maxCol = k;
// Add in alphas
lolo = lohi = 0;
for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) {
// Add in alpha
lohi <<= 4;
lohi |= input[k][ACOMP] >> 4;
cc[1] = lohi;
for (; k >= 0; k--) {
// Add in alpha
lolo <<= 4;
lolo |= input[k][ACOMP] >> 4;
cc[0] = lolo;
color0 = COLOR565(input[minCol]);
color1 = COLOR565(input[maxCol]);
#ifdef RADEON
/* H.Morii - Workaround for ATI Radeon
* According to the OpenGL EXT_texture_compression_s3tc specs,
* the encoding of the RGB components for DXT3 and DXT5 formats
* use the non-transparent encodings of DXT1 but treated as
* though color0 > color1, regardless of the actual values of
* color0 and color1. ATI Radeons however require the values to
* be color0 > color1.
if (color0 < color1) {
int aux;
aux = color0;
color0 = color1;
color1 = aux;
aux = minCol;
minCol = maxCol;
maxCol = aux;
cc[2] = color0 | (color1 << 16);
hihi = 0;
if (color0 != color1) {
MAKEIVEC(n_vect, n_comp, iv, b, input[minCol], input[maxCol]);
// Add in texels
for (k = N_TEXELS - 1; k >= 0; k--) {
int texel;
// Interpolate color
CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
texel = dxtn_color_tlat[0][texel];
// Add in texel
hihi <<= 2;
hihi |= texel;
cc[3] = hihi;
static void
dxt5_rgba_quantize(dword *cc, const byte *lines[], int comps)
float b, iv[MAX_COMP]; // Interpolation vector
qword lo; // Low quadword
dword hihi; // High quadword: high DWORD
int color0, color1;
const int n_vect = 3;
const int n_comp = 3;
#ifndef YUV
int minSum = 2000; // Big enough
int minSum = 2000000;
int maxSum = -1; // Small enough
int minCol = 0; // phoudoin: Silent compiler!
int maxCol = 0; // phoudoin: Silent compiler!
int alpha0 = 2000; // Big enough
int alpha1 = -1; // Small enough
int anyZero = 0, anyOne = 0;
int a_vect;
byte input[N_TEXELS][MAX_COMP];
int i, k, l;
if (comps == 3) {
// Make the whole block opaque
memset(input, -1, sizeof(input));
// 4 texels each line
#ifndef ARGB
for (l = 0; l < 4; l++) {
for (k = 0; k < 4; k++) {
for (i = 0; i < comps; i++) {
input[k + l * 4][i] = *lines[l]++;
// H.Morii - Support for ARGB inputs
for (l = 0; l < 4; l++) {
for (k = 0; k < 4; k++) {
input[k + l * 4][2] = *lines[l]++;
input[k + l * 4][1] = *lines[l]++;
input[k + l * 4][0] = *lines[l]++;
if (comps == 4) input[k + l * 4][3] = *lines[l]++;
Our solution here is to find the darkest and brightest colors in
the 4x4 tile and use those as the two representative colors.
There are probably better algorithms to use (histogram-based).
for (k = 0; k < N_TEXELS; k++) {
int sum = 0;
#ifndef YUV
for (i = 0; i < n_comp; i++) {
sum += input[k][i];
sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP];
if (minSum > sum) {
minSum = sum;
minCol = k;
if (maxSum < sum) {
maxSum = sum;
maxCol = k;
if (alpha0 > input[k][ACOMP]) {
alpha0 = input[k][ACOMP];
if (alpha1 < input[k][ACOMP]) {
alpha1 = input[k][ACOMP];
if (input[k][ACOMP] == 0) {
anyZero = 1;
if (input[k][ACOMP] == 255) {
anyOne = 1;
// Add in alphas
if (alpha0 == alpha1) {
// We'll use 6-vector
cc[0] = alpha0 | (alpha1 << 8);
cc[1] = 0;
else {
if (anyZero && ((alpha0 == 0) || (alpha1 == 0))) {
// We still might use 8-vector
anyZero = 0;
if (anyOne && ((alpha0 == 255) || (alpha1 == 255))) {
// We still might use 8-vector
anyOne = 0;
if ((anyZero | anyOne) ^ (alpha0 <= alpha1)) {
int aux;
aux = alpha0;
alpha0 = alpha1;
alpha1 = aux;
a_vect = (alpha0 <= alpha1) ? 5 : 7;
// Compute interpolation vector
iv[ACOMP] = (float)a_vect / (alpha1 - alpha0);
b = -iv[ACOMP] * alpha0 + 0.5F;
// Add in alphas
Q_MOV32(lo, 0);
for (k = N_TEXELS - 1; k >= 0; k--) {
int texel = -1;
if (anyZero | anyOne) {
if (input[k][ACOMP] == 0) {
texel = 6;
else if (input[k][ACOMP] == 255) {
texel = 7;
// Interpolate alpha
if (texel == -1) {
float dot = input[k][ACOMP] * iv[ACOMP];
texel = (int)(dot + b);
if (texel < 0) {
texel = 0;
else if (texel > a_vect) {
texel = a_vect;
texel = dxtn_alpha_tlat[anyZero | anyOne][texel];
// Add in texel
Q_SHL(lo, 3);
Q_OR32(lo, texel);
Q_SHL(lo, 16);
Q_OR32(lo, alpha0 | (alpha1 << 8));
((qword *)cc)[0] = lo;
color0 = COLOR565(input[minCol]);
color1 = COLOR565(input[maxCol]);
#ifdef RADEON /* H.Morii - Workaround for ATI Radeon */
if (color0 < color1) {
int aux;
aux = color0;
color0 = color1;
color1 = aux;
aux = minCol;
minCol = maxCol;
maxCol = aux;
cc[2] = color0 | (color1 << 16);
hihi = 0;
if (color0 != color1) {
MAKEIVEC(n_vect, n_comp, iv, b, input[minCol], input[maxCol]);
// Add in texels
for (k = N_TEXELS - 1; k >= 0; k--) {
int texel;
// Interpolate color
CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]);
texel = dxtn_color_tlat[0][texel];
// Add in texel
hihi <<= 2;
hihi |= texel;
cc[3] = hihi;
#define ENCODER(dxtn, n) \
dxtn##_encode (int width, int height, int comps, \
const void *source, int srcRowStride, \
void *dest, int destRowStride) \
{ \
int x, y; \
const byte *data; \
dword *encoded = (dword *)dest; \
void *newSource = NULL; \
/* Replicate image if width is not M4 or height is not M4 */ \
if ((width & 3) | (height & 3)) { \
int newWidth = (width + 3) & ~3; \
int newHeight = (height + 3) & ~3; \
newSource = malloc(comps * newWidth * newHeight * sizeof(byte *));\
_mesa_upscale_teximage2d(width, height, newWidth, newHeight, \
comps, (const byte *)source, \
srcRowStride, (byte *)newSource); \
source = newSource; \
width = newWidth; \
height = newHeight; \
srcRowStride = comps * newWidth; \
} \
data = (const byte *)source; \
destRowStride = (destRowStride - width * n) / 4; \
for (y = 0; y < height; y += 4) { \
unsigned int offs = 0 + (y + 0) * srcRowStride; \
for (x = 0; x < width; x += 4) { \
const byte *lines[4]; \
lines[0] = &data[offs]; \
lines[1] = lines[0] + srcRowStride; \
lines[2] = lines[1] + srcRowStride; \
lines[3] = lines[2] + srcRowStride; \
offs += 4 * comps; \
dxtn##_quantize(encoded, lines, comps); \
/* 4x4 block */ \
encoded += n; \
} \
encoded += destRowStride; \
} \
if (newSource != NULL) { \
free(newSource); \
} \
return 0; \
ENCODER(dxt1_rgb, 2)
ENCODER(dxt1_rgba, 2)
ENCODER(dxt3_rgba, 4)
ENCODER(dxt5_rgba, 4)
DXTn decoder
The decoder is based on GL_EXT_texture_compression_s3tc
specification and serves as a concept for the encoder.
// Lookup table for scaling 4-bit colors up to 8-bit
static const byte _rgb_scale_4[] = {
0, 17, 34, 51, 68, 85, 102, 119,
136, 153, 170, 187, 204, 221, 238, 255
// Lookup table for scaling 5-bit colors up to 8-bit
static const byte _rgb_scale_5[] = {
0, 8, 16, 25, 33, 41, 49, 58,
66, 74, 82, 90, 99, 107, 115, 123,
132, 140, 148, 156, 165, 173, 181, 189,
197, 206, 214, 222, 230, 239, 247, 255
// Lookup table for scaling 6-bit colors up to 8-bit
static const byte _rgb_scale_6[] = {
0, 4, 8, 12, 16, 20, 24, 28,
32, 36, 40, 45, 49, 53, 57, 61,
65, 69, 73, 77, 81, 85, 89, 93,
97, 101, 105, 109, 113, 117, 121, 125,
130, 134, 138, 142, 146, 150, 154, 158,
162, 166, 170, 174, 178, 182, 186, 190,
194, 198, 202, 206, 210, 215, 219, 223,
227, 231, 235, 239, 243, 247, 251, 255
#define CC_SEL(cc, which) (((dword *)(cc))[(which) / 32] >> ((which) & 31))
#define UP4(c) _rgb_scale_4[(c) & 15]
#define UP5(c) _rgb_scale_5[(c) & 31]
#define UP6(c) _rgb_scale_6[(c) & 63]
#define ZERO_4UBV(v) *((dword *)(v)) = 0
dxt1_rgb_decode_1(const void *texture, int stride,
int i, int j, byte *rgba)
const byte *src = (const byte *)texture
+ ((j / 4) * ((stride + 3) / 4) + i / 4) * 8;
const int code = (src[4 + (j & 3)] >> ((i & 3) * 2)) & 0x3;
if (code == 0) {
rgba[RCOMP] = UP5(CC_SEL(src, 11));
rgba[GCOMP] = UP6(CC_SEL(src, 5));
rgba[BCOMP] = UP5(CC_SEL(src, 0));
else if (code == 1) {
rgba[RCOMP] = UP5(CC_SEL(src, 27));
rgba[GCOMP] = UP6(CC_SEL(src, 21));
rgba[BCOMP] = UP5(CC_SEL(src, 16));
else {
const word col0 = src[0] | (src[1] << 8);
const word col1 = src[2] | (src[3] << 8);
if (col0 > col1) {
if (code == 2) {
rgba[RCOMP] = (UP5(col0 >> 11) * 2 + UP5(col1 >> 11)) / 3;
rgba[GCOMP] = (UP6(col0 >> 5) * 2 + UP6(col1 >> 5)) / 3;
rgba[BCOMP] = (UP5(col0) * 2 + UP5(col1)) / 3;
else {
rgba[RCOMP] = (UP5(col0 >> 11) + 2 * UP5(col1 >> 11)) / 3;
rgba[GCOMP] = (UP6(col0 >> 5) + 2 * UP6(col1 >> 5)) / 3;
rgba[BCOMP] = (UP5(col0) + 2 * UP5(col1)) / 3;
else {
if (code == 2) {
rgba[RCOMP] = (UP5(col0 >> 11) + UP5(col1 >> 11)) / 2;
rgba[GCOMP] = (UP6(col0 >> 5) + UP6(col1 >> 5)) / 2;
rgba[BCOMP] = (UP5(col0) + UP5(col1)) / 2;
else {
rgba[ACOMP] = 255;
dxt1_rgba_decode_1(const void *texture, int stride,
int i, int j, byte *rgba)
// Same as rgb_dxt1 above, except alpha=0 if col0<=col1 and code=3.
const byte *src = (const byte *)texture
+ ((j / 4) * ((stride + 3) / 4) + i / 4) * 8;
const int code = (src[4 + (j & 3)] >> ((i & 3) * 2)) & 0x3;
if (code == 0) {
rgba[RCOMP] = UP5(CC_SEL(src, 11));
rgba[GCOMP] = UP6(CC_SEL(src, 5));
rgba[BCOMP] = UP5(CC_SEL(src, 0));
rgba[ACOMP] = 255;
else if (code == 1) {
rgba[RCOMP] = UP5(CC_SEL(src, 27));
rgba[GCOMP] = UP6(CC_SEL(src, 21));
rgba[BCOMP] = UP5(CC_SEL(src, 16));
rgba[ACOMP] = 255;
else {
const word col0 = src[0] | (src[1] << 8);
const word col1 = src[2] | (src[3] << 8);
if (col0 > col1) {
if (code == 2) {
rgba[RCOMP] = (UP5(col0 >> 11) * 2 + UP5(col1 >> 11)) / 3;
rgba[GCOMP] = (UP6(col0 >> 5) * 2 + UP6(col1 >> 5)) / 3;
rgba[BCOMP] = (UP5(col0) * 2 + UP5(col1)) / 3;
else {
rgba[RCOMP] = (UP5(col0 >> 11) + 2 * UP5(col1 >> 11)) / 3;
rgba[GCOMP] = (UP6(col0 >> 5) + 2 * UP6(col1 >> 5)) / 3;
rgba[BCOMP] = (UP5(col0) + 2 * UP5(col1)) / 3;
rgba[ACOMP] = 255;
else {
if (code == 2) {
rgba[RCOMP] = (UP5(col0 >> 11) + UP5(col1 >> 11)) / 2;
rgba[GCOMP] = (UP6(col0 >> 5) + UP6(col1 >> 5)) / 2;
rgba[BCOMP] = (UP5(col0) + UP5(col1)) / 2;
rgba[ACOMP] = 255;
else {
dxt3_rgba_decode_1(const void *texture, int stride,
int i, int j, byte *rgba)
const byte *src = (const byte *)texture
+ ((j / 4) * ((stride + 3) / 4) + i / 4) * 16;
const int code = (src[12 + (j & 3)] >> ((i & 3) * 2)) & 0x3;
const dword *cc = (const dword *)(src + 8);
if (code == 0) {
rgba[RCOMP] = UP5(CC_SEL(cc, 11));
rgba[GCOMP] = UP6(CC_SEL(cc, 5));
rgba[BCOMP] = UP5(CC_SEL(cc, 0));
else if (code == 1) {
rgba[RCOMP] = UP5(CC_SEL(cc, 27));
rgba[GCOMP] = UP6(CC_SEL(cc, 21));
rgba[BCOMP] = UP5(CC_SEL(cc, 16));
else if (code == 2) {
// (col0 * (4 - code) + col1 * (code - 1)) / 3
rgba[RCOMP] = (UP5(CC_SEL(cc, 11)) * 2 + UP5(CC_SEL(cc, 27))) / 3;
rgba[GCOMP] = (UP6(CC_SEL(cc, 5)) * 2 + UP6(CC_SEL(cc, 21))) / 3;
rgba[BCOMP] = (UP5(CC_SEL(cc, 0)) * 2 + UP5(CC_SEL(cc, 16))) / 3;
else {
rgba[RCOMP] = (UP5(CC_SEL(cc, 11)) + 2 * UP5(CC_SEL(cc, 27))) / 3;
rgba[GCOMP] = (UP6(CC_SEL(cc, 5)) + 2 * UP6(CC_SEL(cc, 21))) / 3;
rgba[BCOMP] = (UP5(CC_SEL(cc, 0)) + 2 * UP5(CC_SEL(cc, 16))) / 3;
rgba[ACOMP] = UP4(src[((j & 3) * 4 + (i & 3)) / 2] >> ((i & 1) * 4));
dxt5_rgba_decode_1(const void *texture, int stride,
int i, int j, byte *rgba)
const byte *src = (const byte *)texture
+ ((j / 4) * ((stride + 3) / 4) + i / 4) * 16;
const int code = (src[12 + (j & 3)] >> ((i & 3) * 2)) & 0x3;
const dword *cc = (const dword *)(src + 8);
const byte alpha0 = src[0];
const byte alpha1 = src[1];
const int alphaShift = (((j & 3) * 4) + (i & 3)) * 3 + 16;
const int acode = ((alphaShift == 31)
? CC_SEL(src + 2, alphaShift - 16)
: CC_SEL(src, alphaShift)) & 0x7;
if (code == 0) {
rgba[RCOMP] = UP5(CC_SEL(cc, 11));
rgba[GCOMP] = UP6(CC_SEL(cc, 5));
rgba[BCOMP] = UP5(CC_SEL(cc, 0));
else if (code == 1) {
rgba[RCOMP] = UP5(CC_SEL(cc, 27));
rgba[GCOMP] = UP6(CC_SEL(cc, 21));
rgba[BCOMP] = UP5(CC_SEL(cc, 16));
else if (code == 2) {
// (col0 * (4 - code) + col1 * (code - 1)) / 3
rgba[RCOMP] = (UP5(CC_SEL(cc, 11)) * 2 + UP5(CC_SEL(cc, 27))) / 3;
rgba[GCOMP] = (UP6(CC_SEL(cc, 5)) * 2 + UP6(CC_SEL(cc, 21))) / 3;
rgba[BCOMP] = (UP5(CC_SEL(cc, 0)) * 2 + UP5(CC_SEL(cc, 16))) / 3;
else {
rgba[RCOMP] = (UP5(CC_SEL(cc, 11)) + 2 * UP5(CC_SEL(cc, 27))) / 3;
rgba[GCOMP] = (UP6(CC_SEL(cc, 5)) + 2 * UP6(CC_SEL(cc, 21))) / 3;
rgba[BCOMP] = (UP5(CC_SEL(cc, 0)) + 2 * UP5(CC_SEL(cc, 16))) / 3;
if (acode == 0) {
rgba[ACOMP] = alpha0;
else if (acode == 1) {
rgba[ACOMP] = alpha1;
else if (alpha0 > alpha1) {
rgba[ACOMP] = (uint8_t)(((8 - acode) * alpha0 + (acode - 1) * alpha1) / 7);
else if (acode == 6) {
rgba[ACOMP] = 0;
else if (acode == 7) {
rgba[ACOMP] = 255;
else {
rgba[ACOMP] = (uint8_t)(((6 - acode) * alpha0 + (acode - 1) * alpha1) / 5);