// Project64 - A Nintendo 64 emulator // https://www.pj64-emu.com/ // Copyright(C) 2001-2021 Project64 // Copyright(C) 2007 Hiroshi Morii // Copyright(C) 2004 Daniel Borca // GNU/GPLv2 licensed: https://gnu.org/licenses/gpl-2.0.html #include #include #include "types.h" #include "internal.h" #include "fxt1.h" /* FXT1 encoder The encoder was built by reversing the decoder, and is vaguely based on Texus2 by 3DFX. Note that this code is merely a proof of concept, since it is highly unoptimized; Moreover, it is sub-optimal due to initial conditions passed to Lloyd's algorithm (the interpolation modes are even worse). */ #define MAX_COMP 4 /* ever needed maximum number of components in texel */ #define MAX_VECT 4 /* ever needed maximum number of base vectors to find */ #define N_TEXELS 32 /* number of texels in a block (always 32) */ #define LL_N_REP 50 /* number of iterations in lloyd's vq */ #define LL_RMS_D 10 /* fault tolerance (maximum delta) */ #define LL_RMS_E 255 /* fault tolerance (maximum error) */ #define ALPHA_TS 2 /* alpha threshold: (255 - ALPHA_TS) deemed opaque */ #define ISTBLACK(v) (*((dword *)(v)) == 0) #define COPY_4UBV(DST, SRC) *((dword *)(DST)) = *((dword *)(SRC)) static int fxt1_bestcol(float vec[][MAX_COMP], int nv, byte input[MAX_COMP], int nc) { int i, j, best = -1; float err = 1e9; // Big enough for (j = 0; j < nv; j++) { float e = 0.0F; for (i = 0; i < nc; i++) { e += (vec[j][i] - input[i]) * (vec[j][i] - input[i]); } if (e < err) { err = e; best = j; } } return best; } static int fxt1_worst(float vec[MAX_COMP], byte input[N_TEXELS][MAX_COMP], int nc, int n) { int i, k, worst = -1; float err = -1.0F; // Small enough for (k = 0; k < n; k++) { float e = 0.0F; for (i = 0; i < nc; i++) { e += (vec[i] - input[k][i]) * (vec[i] - input[k][i]); } if (e > err) { err = e; worst = k; } } return worst; } static int fxt1_variance(double variance[MAX_COMP], byte input[N_TEXELS][MAX_COMP], int nc, int n) { int i, k, best = 0; dword sx, sx2; double var, maxvar = -1; // Small enough double teenth = 1.0 / n; for (i = 0; i < nc; i++) { sx = sx2 = 0; for (k = 0; k < n; k++) { int t = input[k][i]; sx += t; sx2 += t * t; } var = sx2 * teenth - sx * sx * teenth * teenth; if (maxvar < var) { maxvar = var; best = i; } if (variance) { variance[i] = var; } } return best; } static int fxt1_choose(float vec[][MAX_COMP], int nv, byte input[N_TEXELS][MAX_COMP], int nc, int n) { #if 0 // Choose colors from a grid int i, j; for (j = 0; j < nv; j++) { int m = j * (n - 1) / (nv - 1); for (i = 0; i < nc; i++) { vec[j][i] = input[m][i]; } } #else /* Our solution here is to find the darkest and brightest colors in the 8x4 tile and use those as the two representative colors. There are probably better algorithms to use (histogram-based). */ int i, j, k; #ifndef YUV int minSum = 2000; // Big enough #else int minSum = 2000000; #endif int maxSum = -1; // Small enough int minCol = 0; // phoudoin: Silent compiler! int maxCol = 0; // phoudoin: Silent compiler! struct { int flag; dword key; int freq; int idx; } hist[N_TEXELS]; int lenh = 0; memset(hist, 0, sizeof(hist)); for (k = 0; k < n; k++) { int l; dword key = 0; int sum = 0; for (i = 0; i < nc; i++) { key <<= 8; key |= input[k][i]; #ifndef YUV sum += input[k][i]; #else /* RGB to YUV conversion according to CCIR 601 specs * Y = 0.299R+0.587G+0.114B * U = 0.713(R - Y) = 0.500R-0.419G-0.081B * V = 0.564(B - Y) = -0.169R-0.331G+0.500B */ sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP]; #endif } for (l = 0; l < n; l++) { if (!hist[l].flag) { // Allocate new slot hist[l].flag = !0; hist[l].key = key; hist[l].freq = 1; hist[l].idx = k; lenh = l + 1; break; } else if (hist[l].key == key) { hist[l].freq++; break; } } if (minSum > sum) { minSum = sum; minCol = k; } if (maxSum < sum) { maxSum = sum; maxCol = k; } } if (lenh <= nv) { for (j = 0; j < lenh; j++) { for (i = 0; i < nc; i++) { vec[j][i] = (float)input[hist[j].idx][i]; } } for (; j < nv; j++) { for (i = 0; i < nc; i++) { vec[j][i] = vec[0][i]; } } return 0; } for (j = 0; j < nv; j++) { for (i = 0; i < nc; i++) { vec[j][i] = ((nv - 1 - j) * input[minCol][i] + j * input[maxCol][i] + (nv - 1) / 2) / (float)(nv - 1); } } #endif return !0; } static int fxt1_lloyd(float vec[][MAX_COMP], int nv, byte input[N_TEXELS][MAX_COMP], int nc, int n) { /* Use the generalized Lloyd's algorithm for VQ: find 4 color vectors. For each sample color, sort to nearest vector. Replace each vector with the centroid of it's matching colors. Repeat until RMS doesn't improve. If a color vector has no samples, or becomes the same as another vector, replace it with the color which is farthest from a sample. vec[][MAX_COMP] Initial vectors and resulting colors nv Number of resulting colors required input[N_TEXELS][MAX_COMP] Input texels nc Number of components in input / vec n Number of input samples */ int sum[MAX_VECT][MAX_COMP]; // Used to accumulate closest texels int cnt[MAX_VECT]; // How many times a certain vector was chosen float error, lasterror = 1e9; int i, j, k, rep; // The quantizer for (rep = 0; rep < LL_N_REP; rep++) { // Reset sums and counters for (j = 0; j < nv; j++) { for (i = 0; i < nc; i++) { sum[j][i] = 0; } cnt[j] = 0; } error = 0; // Scan whole block for (k = 0; k < n; k++) { #if 1 int best = -1; float err = 1e9; // Big enough // Determine best vector for (j = 0; j < nv; j++) { float e = (vec[j][0] - input[k][0]) * (vec[j][0] - input[k][0]) + (vec[j][1] - input[k][1]) * (vec[j][1] - input[k][1]) + (vec[j][2] - input[k][2]) * (vec[j][2] - input[k][2]); if (nc == 4) { e += (vec[j][3] - input[k][3]) * (vec[j][3] - input[k][3]); } if (e < err) { err = e; best = j; } } #else int best = fxt1_bestcol(vec, nv, input[k], nc, &err); #endif // Add in closest color for (i = 0; i < nc; i++) { sum[best][i] += input[k][i]; } // Mark this vector as used cnt[best]++; // Accumulate error error += err; } // Check RMS if ((error < LL_RMS_E) || ((error < lasterror) && ((lasterror - error) < LL_RMS_D))) { return !0; // Good match } lasterror = error; // Move each vector to the barycenter of its closest colors for (j = 0; j < nv; j++) { if (cnt[j]) { float div = 1.0F / cnt[j]; for (i = 0; i < nc; i++) { vec[j][i] = div * sum[j][i]; } } else { // This vector has no samples or is identical with a previous vector int worst = fxt1_worst(vec[j], input, nc, n); for (i = 0; i < nc; i++) { vec[j][i] = input[worst][i]; } } } } return 0; // Could not converge fast enough } static void fxt1_quantize_CHROMA(dword *cc, byte input[N_TEXELS][MAX_COMP]) { const int n_vect = 4; // 4 base vectors to find const int n_comp = 3; // 3 components: R, G, B float vec[MAX_VECT][MAX_COMP]; int i, j, k; qword hi; // High quadword dword lohi, lolo; // Low quadword: hi DWORD, lo DWORD if (fxt1_choose(vec, n_vect, input, n_comp, N_TEXELS) != 0) { fxt1_lloyd(vec, n_vect, input, n_comp, N_TEXELS); } Q_MOV32(hi, 4); // cc-chroma = "010" + unused bit for (j = n_vect - 1; j >= 0; j--) { for (i = 0; i < n_comp; i++) { // Add in colors Q_SHL(hi, 5); Q_OR32(hi, (dword)(vec[j][i] / 8.0F)); } } ((qword *)cc)[1] = hi; lohi = lolo = 0; // Right microtile for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) { lohi <<= 2; lohi |= fxt1_bestcol(vec, n_vect, input[k], n_comp); } // Left microtile for (; k >= 0; k--) { lolo <<= 2; lolo |= fxt1_bestcol(vec, n_vect, input[k], n_comp); } cc[1] = lohi; cc[0] = lolo; } static void fxt1_quantize_ALPHA0(dword *cc, byte input[N_TEXELS][MAX_COMP], byte reord[N_TEXELS][MAX_COMP], int n) { const int n_vect = 3; // 3 base vectors to find const int n_comp = 4; // 4 components: R, G, B, A float vec[MAX_VECT][MAX_COMP]; int i, j, k; qword hi; // High quadword dword lohi, lolo; // Low quadword: hi DWORD, lo DWORD // The last vector indicates zero for (i = 0; i < n_comp; i++) { vec[n_vect][i] = 0; } // The first N texels in reord are guaranteed to be non-zero if (fxt1_choose(vec, n_vect, reord, n_comp, n) != 0) { fxt1_lloyd(vec, n_vect, reord, n_comp, n); } Q_MOV32(hi, 6); // Alpha = "011" + lerp = 0 for (j = n_vect - 1; j >= 0; j--) { // Add in alphas Q_SHL(hi, 5); Q_OR32(hi, (dword)(vec[j][ACOMP] / 8.0F)); } for (j = n_vect - 1; j >= 0; j--) { for (i = 0; i < n_comp - 1; i++) { // Add in colors Q_SHL(hi, 5); Q_OR32(hi, (dword)(vec[j][i] / 8.0F)); } } ((qword *)cc)[1] = hi; lohi = lolo = 0; // Right microtile for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) { lohi <<= 2; lohi |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp); } // Left microtile for (; k >= 0; k--) { lolo <<= 2; lolo |= fxt1_bestcol(vec, n_vect + 1, input[k], n_comp); } cc[1] = lohi; cc[0] = lolo; } static void fxt1_quantize_ALPHA1(dword *cc, byte input[N_TEXELS][MAX_COMP]) { const int n_vect = 3; // Highest vector number in each microtile const int n_comp = 4; // 4 components: R, G, B, A float vec[1 + 1 + 1][MAX_COMP]; // 1.5 extrema for each sub-block float b, iv[MAX_COMP]; // Interpolation vector int i, j, k; qword hi; // High quadword dword lohi, lolo; // Low quadword: hi DWORD, lo DWORD int minSum; int maxSum; int minColL = 0, maxColL = 0; int minColR = 0, maxColR = 0; int sumL = 0, sumR = 0; /* TODO: Our solution here is to find the darkest and brightest colors in the 4x4 tile and use those as the two representative colors. There are probably better algorithms to use (histogram-based). */ #ifndef YUV minSum = 2000; // Big enough #else minSum = 2000000; #endif maxSum = -1; // Small enough for (k = 0; k < N_TEXELS / 2; k++) { int sum = 0; #ifndef YUV for (i = 0; i < n_comp; i++) { sum += input[k][i]; } #else sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP]; #endif if (minSum > sum) { minSum = sum; minColL = k; } if (maxSum < sum) { maxSum = sum; maxColL = k; } sumL += sum; } #ifndef YUV minSum = 2000; // Big enough #else minSum = 2000000; #endif maxSum = -1; // Small enough for (; k < N_TEXELS; k++) { int sum = 0; #ifndef YUV for (i = 0; i < n_comp; i++) { sum += input[k][i]; } #else sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP]; #endif if (minSum > sum) { minSum = sum; minColR = k; } if (maxSum < sum) { maxSum = sum; maxColR = k; } sumR += sum; } // Choose the common vector { int j1, j2; int v1 = 0, v2 = 0; float err = 1e9; // Big enough float tv[2 * 2][MAX_COMP]; // 2 extrema for each sub-block for (i = 0; i < n_comp; i++) { tv[0][i] = input[minColL][i]; tv[1][i] = input[maxColL][i]; tv[2][i] = input[minColR][i]; tv[3][i] = input[maxColR][i]; } for (j1 = 0; j1 < 2; j1++) { for (j2 = 2; j2 < 4; j2++) { float e = 0.0F; for (i = 0; i < n_comp; i++) { e += (tv[j1][i] - tv[j2][i]) * (tv[j1][i] - tv[j2][i]); } if (e < err) { err = e; v1 = j1; v2 = j2; } } } for (i = 0; i < n_comp; i++) { vec[0][i] = tv[1 - v1][i]; vec[1][i] = (tv[v1][i] * sumL + tv[v2][i] * sumR) / (sumL + sumR); vec[2][i] = tv[5 - v2][i]; } } // Left microtile cc[0] = 0; if (minColL != maxColL) { // Compute interpolation vector MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]); // Add in texels lolo = 0; for (k = N_TEXELS / 2 - 1; k >= 0; k--) { int texel; // Interpolate color CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]); // Add in texel lolo <<= 2; lolo |= texel; } cc[0] = lolo; } // Right microtile cc[1] = 0; if (minColR != maxColR) { // Compute interpolation vector MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[1]); // Add in texels lohi = 0; for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) { int texel; // Interpolate color CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]); // Add in texel lohi <<= 2; lohi |= texel; } cc[1] = lohi; } Q_MOV32(hi, 7); // Alpha = "011" + lerp = 1 for (j = n_vect - 1; j >= 0; j--) { // Add in alphas Q_SHL(hi, 5); Q_OR32(hi, (dword)(vec[j][ACOMP] / 8.0F)); } for (j = n_vect - 1; j >= 0; j--) { for (i = 0; i < n_comp - 1; i++) { // Add in colors Q_SHL(hi, 5); Q_OR32(hi, (dword)(vec[j][i] / 8.0F)); } } ((qword *)cc)[1] = hi; } static void fxt1_quantize_HI(dword *cc, byte input[N_TEXELS][MAX_COMP], byte reord[N_TEXELS][MAX_COMP], int n) { const int n_vect = 6; // Highest vector number const int n_comp = 3; // 3 components: R, G, B float b = 0.0F; // phoudoin: Silent compiler! float iv[MAX_COMP]; // Interpolation vector int i, k; dword hihi; // High quadword: hi DWORD #ifndef YUV int minSum = 2000; // Big enough #else int minSum = 2000000; #endif int maxSum = -1; // Small enough int minCol = 0; // phoudoin: Silent compiler! int maxCol = 0; // phoudoin: Silent compiler! /* Our solution here is to find the darkest and brightest colors in the 8x4 tile and use those as the two representative colors. There are probably better algorithms to use (histogram-based). */ for (k = 0; k < n; k++) { int sum = 0; #ifndef YUV for (i = 0; i < n_comp; i++) { sum += reord[k][i]; } #else sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP]; #endif if (minSum > sum) { minSum = sum; minCol = k; } if (maxSum < sum) { maxSum = sum; maxCol = k; } } hihi = 0; // cc-hi = "00" for (i = 0; i < n_comp; i++) { // Add in colors hihi <<= 5; hihi |= reord[maxCol][i] >> 3; } for (i = 0; i < n_comp; i++) { // Add in colors hihi <<= 5; hihi |= reord[minCol][i] >> 3; } cc[3] = hihi; cc[0] = cc[1] = cc[2] = 0; // Compute interpolation vector if (minCol != maxCol) { MAKEIVEC(n_vect, n_comp, iv, b, reord[minCol], reord[maxCol]); } // Add in texels for (k = N_TEXELS - 1; k >= 0; k--) { int t = k * 3; dword *kk = (dword *)((byte *)cc + t / 8); int texel = n_vect + 1; // Transparent black if (!ISTBLACK(input[k])) { if (minCol != maxCol) { // Interpolate color CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]); // Add in texel kk[0] |= texel << (t & 7); } } else { // Add in texel kk[0] |= texel << (t & 7); } } } static void fxt1_quantize_MIXED1(dword *cc, byte input[N_TEXELS][MAX_COMP]) { const int n_vect = 2; // Highest vector number in each microtile const int n_comp = 3; // 3 components: R, G, B byte vec[2 * 2][MAX_COMP]; // 2 extrema for each sub-block float b, iv[MAX_COMP]; // Interpolation vector int i, j, k; qword hi; // High quadword dword lohi, lolo; // Low quadword: hi DWORD, lo DWORD int minSum; int maxSum; int minColL = 0, maxColL = -1; int minColR = 0, maxColR = -1; /* Our solution here is to find the darkest and brightest colors in the 4x4 tile and use those as the two representative colors. There are probably better algorithms to use (histogram-based). */ #ifndef YUV minSum = 2000; // Big enough #else minSum = 2000000; #endif maxSum = -1; // Small enough for (k = 0; k < N_TEXELS / 2; k++) { if (!ISTBLACK(input[k])) { int sum = 0; #ifndef YUV for (i = 0; i < n_comp; i++) { sum += input[k][i]; } #else sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP]; #endif if (minSum > sum) { minSum = sum; minColL = k; } if (maxSum < sum) { maxSum = sum; maxColL = k; } } } #ifndef YUV minSum = 2000; // Big enough #else minSum = 2000000; #endif maxSum = -1; // Small enough for (; k < N_TEXELS; k++) { if (!ISTBLACK(input[k])) { int sum = 0; #ifndef YUV for (i = 0; i < n_comp; i++) { sum += input[k][i]; } #else sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP]; #endif if (minSum > sum) { minSum = sum; minColR = k; } if (maxSum < sum) { maxSum = sum; maxColR = k; } } } // Left microtile if (maxColL == -1) { // All transparent black cc[0] = 0xFFFFFFFFUL; for (i = 0; i < n_comp; i++) { vec[0][i] = 0; vec[1][i] = 0; } } else { cc[0] = 0; for (i = 0; i < n_comp; i++) { vec[0][i] = input[minColL][i]; vec[1][i] = input[maxColL][i]; } if (minColL != maxColL) { // Compute interpolation vector MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]); // Add in texels lolo = 0; for (k = N_TEXELS / 2 - 1; k >= 0; k--) { int texel = n_vect + 1; // Transparent black if (!ISTBLACK(input[k])) { // Interpolate color CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]); } // Add in texel lolo <<= 2; lolo |= texel; } cc[0] = lolo; } } // Right microtile if (maxColR == -1) { // All transparent black cc[1] = 0xFFFFFFFFUL; for (i = 0; i < n_comp; i++) { vec[2][i] = 0; vec[3][i] = 0; } } else { cc[1] = 0; for (i = 0; i < n_comp; i++) { vec[2][i] = input[minColR][i]; vec[3][i] = input[maxColR][i]; } if (minColR != maxColR) { // Compute interpolation vector MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]); // Add in texels lohi = 0; for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) { int texel = n_vect + 1; // Transparent black if (!ISTBLACK(input[k])) { // Interpolate color CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]); } // Add in texel lohi <<= 2; lohi |= texel; } cc[1] = lohi; } } Q_MOV32(hi, 9 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); // Chroma = "1" for (j = 2 * 2 - 1; j >= 0; j--) { for (i = 0; i < n_comp; i++) { // Add in colors Q_SHL(hi, 5); Q_OR32(hi, vec[j][i] >> 3); } } ((qword *)cc)[1] = hi; } static void fxt1_quantize_MIXED0(dword *cc, byte input[N_TEXELS][MAX_COMP]) { const int n_vect = 3; // Highest vector number in each microtile const int n_comp = 3; // 3 components: R, G, B byte vec[2 * 2][MAX_COMP]; // 2 extrema for each sub-block float b, iv[MAX_COMP]; // Interpolation vector int i, j, k; qword hi; // High quadword dword lohi, lolo; // Low quadword: hi DWORD, lo DWORD int minColL = 0, maxColL = 0; int minColR = 0, maxColR = 0; #if 0 int minSum; int maxSum; /* Our solution here is to find the darkest and brightest colors in the 4x4 tile and use those as the two representative colors. There are probably better algorithms to use (histogram-based). */ #ifndef YUV minSum = 2000; // Big enough #else minSum = 2000000; #endif maxSum = -1; // Small enough for (k = 0; k < N_TEXELS / 2; k++) { int sum = 0; #ifndef YUV for (i = 0; i < n_comp; i++) { sum += input[k][i]; } #else sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP]; #endif if (minSum > sum) { minSum = sum; minColL = k; } if (maxSum < sum) { maxSum = sum; maxColL = k; } } minSum = 2000; // Big enough maxSum = -1; // Small enough for (; k < N_TEXELS; k++) { int sum = 0; #ifndef YUV for (i = 0; i < n_comp; i++) { sum += input[k][i]; } #else sum = 299 * input[k][RCOMP] + 587 * input[k][GCOMP] + 114 * input[k][BCOMP]; #endif if (minSum > sum) { minSum = sum; minColR = k; } if (maxSum < sum) { maxSum = sum; maxColR = k; } } #else int minVal; int maxVal; int maxVarL = fxt1_variance(NULL, input, n_comp, N_TEXELS / 2); int maxVarR = fxt1_variance(NULL, &input[N_TEXELS / 2], n_comp, N_TEXELS / 2); /* Scan the channel with max variance for lo and hi and use those as the two representative colors. */ minVal = 2000; // Big enough maxVal = -1; // Small enough for (k = 0; k < N_TEXELS / 2; k++) { int t = input[k][maxVarL]; if (minVal > t) { minVal = t; minColL = k; } if (maxVal < t) { maxVal = t; maxColL = k; } } minVal = 2000; // Big enough maxVal = -1; // Small enough for (; k < N_TEXELS; k++) { int t = input[k][maxVarR]; if (minVal > t) { minVal = t; minColR = k; } if (maxVal < t) { maxVal = t; maxColR = k; } } #endif // Left microtile cc[0] = 0; for (i = 0; i < n_comp; i++) { vec[0][i] = input[minColL][i]; vec[1][i] = input[maxColL][i]; } if (minColL != maxColL) { // Compute interpolation vector MAKEIVEC(n_vect, n_comp, iv, b, vec[0], vec[1]); // Add in texels lolo = 0; for (k = N_TEXELS / 2 - 1; k >= 0; k--) { int texel; // Interpolate color CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]); // Add in texel lolo <<= 2; lolo |= texel; } // Funky encoding for LSB of green if ((int)((lolo >> 1) & 1) != (((vec[1][GCOMP] ^ vec[0][GCOMP]) >> 2) & 1)) { for (i = 0; i < n_comp; i++) { vec[1][i] = input[minColL][i]; vec[0][i] = input[maxColL][i]; } lolo = ~lolo; } cc[0] = lolo; } // Right microtile cc[1] = 0; for (i = 0; i < n_comp; i++) { vec[2][i] = input[minColR][i]; vec[3][i] = input[maxColR][i]; } if (minColR != maxColR) { // Compute interpolation vector MAKEIVEC(n_vect, n_comp, iv, b, vec[2], vec[3]); // Add in texels lohi = 0; for (k = N_TEXELS - 1; k >= N_TEXELS / 2; k--) { int texel; // Interpolate color CALCCDOT(texel, n_vect, n_comp, iv, b, input[k]); // Add in texel lohi <<= 2; lohi |= texel; } // Funky encoding for LSB of green if ((int)((lohi >> 1) & 1) != (((vec[3][GCOMP] ^ vec[2][GCOMP]) >> 2) & 1)) { for (i = 0; i < n_comp; i++) { vec[3][i] = input[minColR][i]; vec[2][i] = input[maxColR][i]; } lohi = ~lohi; } cc[1] = lohi; } Q_MOV32(hi, 8 | (vec[3][GCOMP] & 4) | ((vec[1][GCOMP] >> 1) & 2)); // Chroma = "1" for (j = 2 * 2 - 1; j >= 0; j--) { for (i = 0; i < n_comp; i++) { // Add in colors Q_SHL(hi, 5); Q_OR32(hi, vec[j][i] >> 3); } } ((qword *)cc)[1] = hi; } static void fxt1_quantize(dword *cc, const byte *lines[], int comps) { int trualpha; byte reord[N_TEXELS][MAX_COMP]; byte input[N_TEXELS][MAX_COMP]; #ifndef ARGB int i; #endif int k, l; if (comps == 3) { // Make the whole block opaque memset(input, -1, sizeof(input)); } // 8 texels each line #ifndef ARGB for (l = 0; l < 4; l++) { for (k = 0; k < 4; k++) { for (i = 0; i < comps; i++) { input[k + l * 4][i] = *lines[l]++; } } for (; k < 8; k++) { for (i = 0; i < comps; i++) { input[k + l * 4 + 12][i] = *lines[l]++; } } } #else // H.Morii - support for ARGB inputs for (l = 0; l < 4; l++) { for (k = 0; k < 4; k++) { input[k + l * 4][2] = *lines[l]++; input[k + l * 4][1] = *lines[l]++; input[k + l * 4][0] = *lines[l]++; if (comps == 4) input[k + l * 4][3] = *lines[l]++; } for (; k < 8; k++) { input[k + l * 4 + 12][2] = *lines[l]++; input[k + l * 4 + 12][1] = *lines[l]++; input[k + l * 4 + 12][0] = *lines[l]++; if (comps == 4) input[k + l * 4 + 12][3] = *lines[l]++; } } #endif /* Block layout: 00, 01, 02, 03, 08, 09, 0a, 0b 10, 11, 12, 13, 18, 19, 1a, 1b 04, 05, 06, 07, 0c, 0d, 0e, 0f 14, 15, 16, 17, 1c, 1d, 1e, 1f */ /* [dBorca] Stupidity flows forth from this */ l = N_TEXELS; trualpha = 0; if (comps == 4) { // Skip all transparent black texels l = 0; for (k = 0; k < N_TEXELS; k++) { // Test all components against 0 if (!ISTBLACK(input[k])) { // Texel is not transparent black COPY_4UBV(reord[l], input[k]); if (reord[l][ACOMP] < (255 - ALPHA_TS)) { // Non-opaque texel trualpha = !0; } l++; } } } #if 0 if (trualpha) { fxt1_quantize_ALPHA0(cc, input, reord, l); } else if (l == 0) { cc[0] = cc[1] = cc[2] = -1; cc[3] = 0; } else if (l < N_TEXELS) { fxt1_quantize_HI(cc, input, reord, l); } else { fxt1_quantize_CHROMA(cc, input); } (void)fxt1_quantize_ALPHA1; (void)fxt1_quantize_MIXED1; (void)fxt1_quantize_MIXED0; #else if (trualpha) { fxt1_quantize_ALPHA1(cc, input); } else if (l == 0) { cc[0] = cc[1] = cc[2] = 0xFFFFFFFFUL; cc[3] = 0; } else if (l < N_TEXELS) { fxt1_quantize_MIXED1(cc, input); } else { fxt1_quantize_MIXED0(cc, input); } (void)fxt1_quantize_ALPHA0; (void)fxt1_quantize_HI; (void)fxt1_quantize_CHROMA; #endif } TAPI int TAPIENTRY fxt1_encode(int width, int height, int comps, const void *source, int srcRowStride, void *dest, int destRowStride) { int x, y; const byte *data; dword *encoded = (dword *)dest; void *newSource = NULL; // Replicate image if width is not M8 or height is not M4 if ((width & 7) | (height & 3)) { int newWidth = (width + 7) & ~7; int newHeight = (height + 3) & ~3; newSource = malloc(comps * newWidth * newHeight * sizeof(byte *)); _mesa_upscale_teximage2d(width, height, newWidth, newHeight, comps, (const byte *)source, srcRowStride, (byte *)newSource); source = newSource; width = newWidth; height = newHeight; srcRowStride = comps * newWidth; } data = (const byte *)source; destRowStride = (destRowStride - width * 2) / 4; for (y = 0; y < height; y += 4) { unsigned int offs = 0 + (y + 0) * srcRowStride; for (x = 0; x < width; x += 8) { const byte *lines[4]; lines[0] = &data[offs]; lines[1] = lines[0] + srcRowStride; lines[2] = lines[1] + srcRowStride; lines[3] = lines[2] + srcRowStride; offs += 8 * comps; fxt1_quantize(encoded, lines, comps); // 128 bits per 8x4 block encoded += 4; } encoded += destRowStride; } if (newSource != NULL) { free(newSource); } return 0; } /* FXT1 decoder The decoder is based on GL_3DFX_texture_compression_FXT1 specification and serves as a concept for the encoder. */ // Lookup table for scaling 5-bit color up to 8-bit color static const byte _rgb_scale_5[] = { 0, 8, 16, 25, 33, 41, 49, 58, 66, 74, 82, 90, 99, 107, 115, 123, 132, 140, 148, 156, 165, 173, 181, 189, 197, 206, 214, 222, 230, 239, 247, 255 }; // Lookup table for scaling 6-bit color up to 8-bit color static const byte _rgb_scale_6[] = { 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 45, 49, 53, 57, 61, 65, 69, 73, 77, 81, 85, 89, 93, 97, 101, 105, 109, 113, 117, 121, 125, 130, 134, 138, 142, 146, 150, 154, 158, 162, 166, 170, 174, 178, 182, 186, 190, 194, 198, 202, 206, 210, 215, 219, 223, 227, 231, 235, 239, 243, 247, 251, 255 }; #define CC_SEL(cc, which) (((dword *)(cc))[(which) / 32] >> ((which) & 31)) #define UP5(c) _rgb_scale_5[(c) & 31] #define UP6(c, b) _rgb_scale_6[(((c) & 31) << 1) | ((b) & 1)] #define LERP(n, t, c0, c1) (((n) - (t)) * (c0) + (t) * (c1) + (n) / 2) / (n) #define ZERO_4UBV(v) *((dword *)(v)) = 0 static void fxt1_decode_1HI(const byte *code, int t, byte *rgba) { const dword *cc; t *= 3; cc = (const dword *)(code + t / 8); t = (cc[0] >> (t & 7)) & 7; if (t == 7) { ZERO_4UBV(rgba); } else { cc = (const dword *)(code + 12); if (t == 0) { rgba[BCOMP] = UP5(CC_SEL(cc, 0)); rgba[GCOMP] = UP5(CC_SEL(cc, 5)); rgba[RCOMP] = UP5(CC_SEL(cc, 10)); } else if (t == 6) { rgba[BCOMP] = UP5(CC_SEL(cc, 15)); rgba[GCOMP] = UP5(CC_SEL(cc, 20)); rgba[RCOMP] = UP5(CC_SEL(cc, 25)); } else { rgba[BCOMP] = LERP(6, t, UP5(CC_SEL(cc, 0)), UP5(CC_SEL(cc, 15))); rgba[GCOMP] = LERP(6, t, UP5(CC_SEL(cc, 5)), UP5(CC_SEL(cc, 20))); rgba[RCOMP] = LERP(6, t, UP5(CC_SEL(cc, 10)), UP5(CC_SEL(cc, 25))); } rgba[ACOMP] = 255; } } static void fxt1_decode_1CHROMA(const byte *code, int t, byte *rgba) { const dword *cc; dword kk; cc = (const dword *)code; if (t & 16) { cc++; t &= 15; } t = (cc[0] >> (t * 2)) & 3; t *= 15; cc = (const dword *)(code + 8 + t / 8); kk = cc[0] >> (t & 7); rgba[BCOMP] = UP5(kk); rgba[GCOMP] = UP5(kk >> 5); rgba[RCOMP] = UP5(kk >> 10); rgba[ACOMP] = 255; } static void fxt1_decode_1MIXED(const byte *code, int t, byte *rgba) { const dword *cc; int col[2][3]; int glsb, selb; cc = (const dword *)code; if (t & 16) { t &= 15; t = (cc[1] >> (t * 2)) & 3; // Col 2 col[0][BCOMP] = (*(const dword *)(code + 11)) >> 6; col[0][GCOMP] = CC_SEL(cc, 99); col[0][RCOMP] = CC_SEL(cc, 104); // Col 3 col[1][BCOMP] = CC_SEL(cc, 109); col[1][GCOMP] = CC_SEL(cc, 114); col[1][RCOMP] = CC_SEL(cc, 119); glsb = CC_SEL(cc, 126); selb = CC_SEL(cc, 33); } else { t = (cc[0] >> (t * 2)) & 3; // Col 0 col[0][BCOMP] = CC_SEL(cc, 64); col[0][GCOMP] = CC_SEL(cc, 69); col[0][RCOMP] = CC_SEL(cc, 74); // Col 1 col[1][BCOMP] = CC_SEL(cc, 79); col[1][GCOMP] = CC_SEL(cc, 84); col[1][RCOMP] = CC_SEL(cc, 89); glsb = CC_SEL(cc, 125); selb = CC_SEL(cc, 1); } if (CC_SEL(cc, 124) & 1) { // alpha[0] == 1 if (t == 3) { ZERO_4UBV(rgba); } else { if (t == 0) { rgba[BCOMP] = UP5(col[0][BCOMP]); rgba[GCOMP] = UP5(col[0][GCOMP]); rgba[RCOMP] = UP5(col[0][RCOMP]); } else if (t == 2) { rgba[BCOMP] = UP5(col[1][BCOMP]); rgba[GCOMP] = UP6(col[1][GCOMP], glsb); rgba[RCOMP] = UP5(col[1][RCOMP]); } else { rgba[BCOMP] = (UP5(col[0][BCOMP]) + UP5(col[1][BCOMP])) / 2; rgba[GCOMP] = (UP5(col[0][GCOMP]) + UP6(col[1][GCOMP], glsb)) / 2; rgba[RCOMP] = (UP5(col[0][RCOMP]) + UP5(col[1][RCOMP])) / 2; } rgba[ACOMP] = 255; } } else { // alpha[0] == 0 if (t == 0) { rgba[BCOMP] = UP5(col[0][BCOMP]); rgba[GCOMP] = UP6(col[0][GCOMP], glsb ^ selb); rgba[RCOMP] = UP5(col[0][RCOMP]); } else if (t == 3) { rgba[BCOMP] = UP5(col[1][BCOMP]); rgba[GCOMP] = UP6(col[1][GCOMP], glsb); rgba[RCOMP] = UP5(col[1][RCOMP]); } else { rgba[BCOMP] = LERP(3, t, UP5(col[0][BCOMP]), UP5(col[1][BCOMP])); rgba[GCOMP] = LERP(3, t, UP6(col[0][GCOMP], glsb ^ selb), UP6(col[1][GCOMP], glsb)); rgba[RCOMP] = LERP(3, t, UP5(col[0][RCOMP]), UP5(col[1][RCOMP])); } rgba[ACOMP] = 255; } } static void fxt1_decode_1ALPHA(const byte *code, int t, byte *rgba) { const dword *cc; cc = (const dword *)code; if (CC_SEL(cc, 124) & 1) { // lerp == 1 int col0[4]; if (t & 16) { t &= 15; t = (cc[1] >> (t * 2)) & 3; // Col 2 col0[BCOMP] = (*(const dword *)(code + 11)) >> 6; col0[GCOMP] = CC_SEL(cc, 99); col0[RCOMP] = CC_SEL(cc, 104); col0[ACOMP] = CC_SEL(cc, 119); } else { t = (cc[0] >> (t * 2)) & 3; // Col 0 col0[BCOMP] = CC_SEL(cc, 64); col0[GCOMP] = CC_SEL(cc, 69); col0[RCOMP] = CC_SEL(cc, 74); col0[ACOMP] = CC_SEL(cc, 109); } if (t == 0) { rgba[BCOMP] = UP5(col0[BCOMP]); rgba[GCOMP] = UP5(col0[GCOMP]); rgba[RCOMP] = UP5(col0[RCOMP]); rgba[ACOMP] = UP5(col0[ACOMP]); } else if (t == 3) { rgba[BCOMP] = UP5(CC_SEL(cc, 79)); rgba[GCOMP] = UP5(CC_SEL(cc, 84)); rgba[RCOMP] = UP5(CC_SEL(cc, 89)); rgba[ACOMP] = UP5(CC_SEL(cc, 114)); } else { rgba[BCOMP] = LERP(3, t, UP5(col0[BCOMP]), UP5(CC_SEL(cc, 79))); rgba[GCOMP] = LERP(3, t, UP5(col0[GCOMP]), UP5(CC_SEL(cc, 84))); rgba[RCOMP] = LERP(3, t, UP5(col0[RCOMP]), UP5(CC_SEL(cc, 89))); rgba[ACOMP] = LERP(3, t, UP5(col0[ACOMP]), UP5(CC_SEL(cc, 114))); } } else { // lerp == 0 if (t & 16) { cc++; t &= 15; } t = (cc[0] >> (t * 2)) & 3; if (t == 3) { ZERO_4UBV(rgba); } else { dword kk; cc = (const dword *)code; rgba[ACOMP] = UP5(cc[3] >> (t * 5 + 13)); t *= 15; cc = (const dword *)(code + 8 + t / 8); kk = cc[0] >> (t & 7); rgba[BCOMP] = UP5(kk); rgba[GCOMP] = UP5(kk >> 5); rgba[RCOMP] = UP5(kk >> 10); } } } TAPI void TAPIENTRY fxt1_decode_1(const void *texture, int stride, int i, int j, byte *rgba) { static void(*decode_1[]) (const byte *, int, byte *) = { fxt1_decode_1HI, // cc-high = "00?" fxt1_decode_1HI, // cc-high = "00?" fxt1_decode_1CHROMA, // cc-chroma = "010" fxt1_decode_1ALPHA, // alpha = "011" fxt1_decode_1MIXED, // mixed = "1??" fxt1_decode_1MIXED, // mixed = "1??" fxt1_decode_1MIXED, // mixed = "1??" fxt1_decode_1MIXED // mixed = "1??" }; const byte *code = (const byte *)texture + ((j / 4) * (stride / 8) + (i / 8)) * 16; int mode = CC_SEL(code, 125); int t = i & 7; if (t & 4) { t += 12; } t += (j & 3) * 4; decode_1[mode](code, t, rgba); #if VERBOSE { extern int cc_chroma; extern int cc_alpha; extern int cc_high; extern int cc_mixed; static int *cctype[] = { &cc_high, &cc_high, &cc_chroma, &cc_alpha, &cc_mixed, &cc_mixed, &cc_mixed, &cc_mixed }; (*cctype[mode])++; } #endif }