// File: crn_etc.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "../inc/crnlib.h" #include "crn_dxt.h" namespace crnlib { enum etc_constants { cETC1BytesPerBlock = 8U, cETC1SelectorBits = 2U, cETC1SelectorValues = 1U << cETC1SelectorBits, cETC1SelectorMask = cETC1SelectorValues - 1U, cETC1BlockShift = 2U, cETC1BlockSize = 1U << cETC1BlockShift, cETC1LSBSelectorIndicesBitOffset = 0, cETC1MSBSelectorIndicesBitOffset = 16, cETC1FlipBitOffset = 32, cETC1DiffBitOffset = 33, cETC1IntenModifierNumBits = 3, cETC1IntenModifierValues = 1 << cETC1IntenModifierNumBits, cETC1RightIntenModifierTableBitOffset = 34, cETC1LeftIntenModifierTableBitOffset = 37, // Base+Delta encoding (5 bit bases, 3 bit delta) cETC1BaseColorCompNumBits = 5, cETC1BaseColorCompMax = 1 << cETC1BaseColorCompNumBits, cETC1DeltaColorCompNumBits = 3, cETC1DeltaColorComp = 1 << cETC1DeltaColorCompNumBits, cETC1DeltaColorCompMax = 1 << cETC1DeltaColorCompNumBits, cETC1BaseColor5RBitOffset = 59, cETC1BaseColor5GBitOffset = 51, cETC1BaseColor5BBitOffset = 43, cETC1DeltaColor3RBitOffset = 56, cETC1DeltaColor3GBitOffset = 48, cETC1DeltaColor3BBitOffset = 40, // Absolute (non-delta) encoding (two 4-bit per component bases) cETC1AbsColorCompNumBits = 4, cETC1AbsColorCompMax = 1 << cETC1AbsColorCompNumBits, cETC1AbsColor4R1BitOffset = 60, cETC1AbsColor4G1BitOffset = 52, cETC1AbsColor4B1BitOffset = 44, cETC1AbsColor4R2BitOffset = 56, cETC1AbsColor4G2BitOffset = 48, cETC1AbsColor4B2BitOffset = 40, cETC1ColorDeltaMin = -4, cETC1ColorDeltaMax = 3, // Delta3: // 0 1 2 3 4 5 6 7 // 000 001 010 011 100 101 110 111 // 0 1 2 3 -4 -3 -2 -1 }; extern const int g_etc1_inten_tables[cETC1IntenModifierValues][cETC1SelectorValues]; extern const uint8 g_etc1_to_selector_index[cETC1SelectorValues]; extern const uint8 g_selector_index_to_etc1[cETC1SelectorValues]; struct etc1_coord2 { uint8 m_x, m_y; }; extern const etc1_coord2 g_etc1_pixel_coords[2][2][8]; // [flipped][subblock][subblock_pixel] struct etc1_block { // big endian uint64: // bit ofs: 56 48 40 32 24 16 8 0 // byte ofs: b0, b1, b2, b3, b4, b5, b6, b7 union { uint64 m_uint64; uint8 m_bytes[8]; }; uint8 m_low_color[2]; uint8 m_high_color[2]; enum { cNumSelectorBytes = 4 }; uint8 m_selectors[cNumSelectorBytes]; inline void clear() { utils::zero_this(this); } inline uint get_general_bits(uint ofs, uint num) const { CRNLIB_ASSERT((ofs + num) <= 64U); CRNLIB_ASSERT(num && (num < 32U)); return (utils::read_be64(&m_uint64) >> ofs) & ((1UL << num) - 1UL); } inline void set_general_bits(uint ofs, uint num, uint bits) { CRNLIB_ASSERT((ofs + num) <= 64U); CRNLIB_ASSERT(num && (num < 32U)); uint64 x = utils::read_be64(&m_uint64); uint64 msk = ((1ULL << static_cast(num)) - 1ULL) << static_cast(ofs); x &= ~msk; x |= (static_cast(bits) << static_cast(ofs)); utils::write_be64(&m_uint64, x); } inline uint get_byte_bits(uint ofs, uint num) const { CRNLIB_ASSERT((ofs + num) <= 64U); CRNLIB_ASSERT(num && (num <= 8U)); CRNLIB_ASSERT((ofs >> 3) == ((ofs + num - 1) >> 3)); const uint byte_ofs = 7 - (ofs >> 3); const uint byte_bit_ofs = ofs & 7; return (m_bytes[byte_ofs] >> byte_bit_ofs) & ((1 << num) - 1); } inline void set_byte_bits(uint ofs, uint num, uint bits) { CRNLIB_ASSERT((ofs + num) <= 64U); CRNLIB_ASSERT(num && (num < 32U)); CRNLIB_ASSERT((ofs >> 3) == ((ofs + num - 1) >> 3)); CRNLIB_ASSERT(bits < (1U << num)); const uint byte_ofs = 7 - (ofs >> 3); const uint byte_bit_ofs = ofs & 7; const uint mask = (1 << num) - 1; m_bytes[byte_ofs] &= ~(mask << byte_bit_ofs); m_bytes[byte_ofs] |= (bits << byte_bit_ofs); } // false = left/right subblocks // true = upper/lower subblocks inline bool get_flip_bit() const { return (m_bytes[3] & 1) != 0; } inline void set_flip_bit(bool flip) { m_bytes[3] &= ~1; m_bytes[3] |= static_cast(flip); } inline bool get_diff_bit() const { return (m_bytes[3] & 2) != 0; } inline void set_diff_bit(bool diff) { m_bytes[3] &= ~2; m_bytes[3] |= (static_cast(diff) << 1); } // Returns intensity modifier table (0-7) used by subblock subblock_id. // subblock_id=0 left/top (CW 1), 1=right/bottom (CW 2) inline uint get_inten_table(uint subblock_id) const { CRNLIB_ASSERT(subblock_id < 2); const uint ofs = subblock_id ? 2 : 5; return (m_bytes[3] >> ofs) & 7; } // Sets intensity modifier table (0-7) used by subblock subblock_id (0 or 1) inline void set_inten_table(uint subblock_id, uint t) { CRNLIB_ASSERT(subblock_id < 2); CRNLIB_ASSERT(t < 8); const uint ofs = subblock_id ? 2 : 5; m_bytes[3] &= ~(7 << ofs); m_bytes[3] |= (t << ofs); } // Returned selector value ranges from 0-3 and is a direct index into g_etc1_inten_tables. inline uint get_selector(uint x, uint y) const { CRNLIB_ASSERT((x | y) < 4); const uint bit_index = x * 4 + y; const uint byte_bit_ofs = bit_index & 7; const uint8 *p = &m_bytes[7 - (bit_index >> 3)]; const uint lsb = (p[0] >> byte_bit_ofs) & 1; const uint msb = (p[-2] >> byte_bit_ofs) & 1; const uint val = lsb | (msb << 1); return g_etc1_to_selector_index[val]; } // Selector "val" ranges from 0-3 and is a direct index into g_etc1_inten_tables. inline void set_selector(uint x, uint y, uint val) { CRNLIB_ASSERT((x | y | val) < 4); const uint bit_index = x * 4 + y; uint8 *p = &m_bytes[7 - (bit_index >> 3)]; const uint byte_bit_ofs = bit_index & 7; const uint mask = 1 << byte_bit_ofs; const uint etc1_val = g_selector_index_to_etc1[val]; const uint lsb = etc1_val & 1; const uint msb = etc1_val >> 1; p[0] &= ~mask; p[0] |= (lsb << byte_bit_ofs); p[-2] &= ~mask; p[-2] |= (msb << byte_bit_ofs); } inline void set_base4_color(uint idx, uint16 c) { if (idx) { set_byte_bits(cETC1AbsColor4R2BitOffset, 4, (c >> 8) & 15); set_byte_bits(cETC1AbsColor4G2BitOffset, 4, (c >> 4) & 15); set_byte_bits(cETC1AbsColor4B2BitOffset, 4, c & 15); } else { set_byte_bits(cETC1AbsColor4R1BitOffset, 4, (c >> 8) & 15); set_byte_bits(cETC1AbsColor4G1BitOffset, 4, (c >> 4) & 15); set_byte_bits(cETC1AbsColor4B1BitOffset, 4, c & 15); } } inline uint16 get_base4_color(uint idx) const { uint r, g, b; if (idx) { r = get_byte_bits(cETC1AbsColor4R2BitOffset, 4); g = get_byte_bits(cETC1AbsColor4G2BitOffset, 4); b = get_byte_bits(cETC1AbsColor4B2BitOffset, 4); } else { r = get_byte_bits(cETC1AbsColor4R1BitOffset, 4); g = get_byte_bits(cETC1AbsColor4G1BitOffset, 4); b = get_byte_bits(cETC1AbsColor4B1BitOffset, 4); } return static_cast(b | (g << 4U) | (r << 8U)); } inline void set_base5_color(uint16 c) { set_byte_bits(cETC1BaseColor5RBitOffset, 5, (c >> 10) & 31); set_byte_bits(cETC1BaseColor5GBitOffset, 5, (c >> 5) & 31); set_byte_bits(cETC1BaseColor5BBitOffset, 5, c & 31); } inline uint16 get_base5_color() const { const uint r = get_byte_bits(cETC1BaseColor5RBitOffset, 5); const uint g = get_byte_bits(cETC1BaseColor5GBitOffset, 5); const uint b = get_byte_bits(cETC1BaseColor5BBitOffset, 5); return static_cast(b | (g << 5U) | (r << 10U)); } void set_delta3_color(uint16 c) { set_byte_bits(cETC1DeltaColor3RBitOffset, 3, (c >> 6) & 7); set_byte_bits(cETC1DeltaColor3GBitOffset, 3, (c >> 3) & 7); set_byte_bits(cETC1DeltaColor3BBitOffset, 3, c & 7); } inline uint16 get_delta3_color() const { const uint r = get_byte_bits(cETC1DeltaColor3RBitOffset, 3); const uint g = get_byte_bits(cETC1DeltaColor3GBitOffset, 3); const uint b = get_byte_bits(cETC1DeltaColor3BBitOffset, 3); return static_cast(b | (g << 3U) | (r << 6U)); } // Base color 5 static uint16 pack_color5(const color_quad_u8& color, bool scaled, uint bias = 127U); static uint16 pack_color5(uint r, uint g, uint b, bool scaled, uint bias = 127U); static color_quad_u8 unpack_color5(uint16 packed_color5, bool scaled, uint alpha = 255U); static void unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color, bool scaled); static bool unpack_color5(color_quad_u8& result, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha = 255U); static bool unpack_color5(uint& r, uint& g, uint& b, uint16 packed_color5, uint16 packed_delta3, bool scaled, uint alpha = 255U); // Delta color 3 // Inputs range from -4 to 3 (cETC1ColorDeltaMin to cETC1ColorDeltaMax) static uint16 pack_delta3(const color_quad_i16& color); static uint16 pack_delta3(int r, int g, int b); // Results range from -4 to 3 (cETC1ColorDeltaMin to cETC1ColorDeltaMax) static color_quad_i16 unpack_delta3(uint16 packed_delta3); static void unpack_delta3(int& r, int& g, int& b, uint16 packed_delta3); // Abs color 4 static uint16 pack_color4(const color_quad_u8& color, bool scaled, uint bias = 127U); static uint16 pack_color4(uint r, uint g, uint b, bool scaled, uint bias = 127U); static color_quad_u8 unpack_color4(uint16 packed_color4, bool scaled, uint alpha = 255U); static void unpack_color4(uint& r, uint& g, uint& b, uint16 packed_color4, bool scaled); // subblock colors static void get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint table_idx); static bool get_diff_subblock_colors(color_quad_u8* pDst, uint16 packed_color5, uint16 packed_delta3, uint table_idx); static void get_abs_subblock_colors(color_quad_u8* pDst, uint16 packed_color4, uint table_idx); static inline void unscaled_to_scaled_color(color_quad_u8& dst, const color_quad_u8& src, bool color4) { if (color4) { dst.r = src.r | (src.r << 4); dst.g = src.g | (src.g << 4); dst.b = src.b | (src.b << 4); } else { dst.r = (src.r >> 2) | (src.r << 3); dst.g = (src.g >> 2) | (src.g << 3); dst.b = (src.b >> 2) | (src.b << 3); } dst.a = src.a; } }; CRNLIB_DEFINE_BITWISE_COPYABLE(etc1_block); // Returns false if the block is invalid (it will still be unpacked with clamping). bool unpack_etc1(const etc1_block& block, color_quad_u8 *pDst, bool preserve_alpha = false); enum crn_etc_quality { cCRNETCQualityFast, cCRNETCQualityMedium, cCRNETCQualitySlow, cCRNETCQualityTotal, cCRNETCQualityForceDWORD = 0xFFFFFFFF }; struct crn_etc1_pack_params { crn_etc_quality m_quality; bool m_perceptual; bool m_dithering; inline crn_etc1_pack_params() { clear(); } void clear() { m_quality = cCRNETCQualitySlow; m_perceptual = true; m_dithering = false; } }; struct etc1_solution_coordinates { inline etc1_solution_coordinates() : m_unscaled_color(0, 0, 0, 0), m_inten_table(0), m_color4(false) { } inline etc1_solution_coordinates(uint r, uint g, uint b, uint inten_table, bool color4) : m_unscaled_color(r, g, b, 255), m_inten_table(inten_table), m_color4(color4) { } inline etc1_solution_coordinates(const color_quad_u8& c, uint inten_table, bool color4) : m_unscaled_color(c), m_inten_table(inten_table), m_color4(color4) { } inline etc1_solution_coordinates(const etc1_solution_coordinates& other) { *this = other; } inline etc1_solution_coordinates& operator= (const etc1_solution_coordinates& rhs) { m_unscaled_color = rhs.m_unscaled_color; m_inten_table = rhs.m_inten_table; m_color4 = rhs.m_color4; return *this; } inline void clear() { m_unscaled_color.clear(); m_inten_table = 0; m_color4 = false; } inline color_quad_u8 get_scaled_color() const { int br, bg, bb; if (m_color4) { br = m_unscaled_color.r | (m_unscaled_color.r << 4); bg = m_unscaled_color.g | (m_unscaled_color.g << 4); bb = m_unscaled_color.b | (m_unscaled_color.b << 4); } else { br = (m_unscaled_color.r >> 2) | (m_unscaled_color.r << 3); bg = (m_unscaled_color.g >> 2) | (m_unscaled_color.g << 3); bb = (m_unscaled_color.b >> 2) | (m_unscaled_color.b << 3); } return color_quad_u8(br, bg, bb); } inline void get_block_colors(color_quad_u8* pBlock_colors) { int br, bg, bb; if (m_color4) { br = m_unscaled_color.r | (m_unscaled_color.r << 4); bg = m_unscaled_color.g | (m_unscaled_color.g << 4); bb = m_unscaled_color.b | (m_unscaled_color.b << 4); } else { br = (m_unscaled_color.r >> 2) | (m_unscaled_color.r << 3); bg = (m_unscaled_color.g >> 2) | (m_unscaled_color.g << 3); bb = (m_unscaled_color.b >> 2) | (m_unscaled_color.b << 3); } const int* pInten_table = g_etc1_inten_tables[m_inten_table]; pBlock_colors[0].set(br + pInten_table[0], bg + pInten_table[0], bb + pInten_table[0]); pBlock_colors[1].set(br + pInten_table[1], bg + pInten_table[1], bb + pInten_table[1]); pBlock_colors[2].set(br + pInten_table[2], bg + pInten_table[2], bb + pInten_table[2]); pBlock_colors[3].set(br + pInten_table[3], bg + pInten_table[3], bb + pInten_table[3]); } color_quad_u8 m_unscaled_color; uint m_inten_table; bool m_color4; }; class etc1_optimizer { CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(etc1_optimizer); public: etc1_optimizer() { clear(); } void clear() { m_pParams = NULL; m_pResult = NULL; m_pSorted_luma = NULL; m_pSorted_luma_indices = NULL; } struct params : crn_etc1_pack_params { params() { clear(); } params(const crn_etc1_pack_params& base_params) : crn_etc1_pack_params(base_params) { clear_optimizer_params(); } void clear() { crn_etc1_pack_params::clear(); clear_optimizer_params(); } void clear_optimizer_params() { m_num_src_pixels = 0; m_pSrc_pixels = 0; m_use_color4 = false; static const int s_default_scan_delta[] = { 0 }; m_pScan_deltas = s_default_scan_delta; m_scan_delta_size = 1; m_base_color5.clear(); m_constrain_against_base_color5 = false; } uint m_num_src_pixels; const color_quad_u8* m_pSrc_pixels; bool m_use_color4; const int* m_pScan_deltas; uint m_scan_delta_size; color_quad_u8 m_base_color5; bool m_constrain_against_base_color5; }; struct results { uint64 m_error; color_quad_u8 m_block_color_unscaled; uint m_block_inten_table; uint m_n; uint8* m_pSelectors; bool m_block_color4; inline results& operator= (const results& rhs) { m_block_color_unscaled = rhs.m_block_color_unscaled; m_block_color4 = rhs.m_block_color4; m_block_inten_table = rhs.m_block_inten_table; m_error = rhs.m_error; CRNLIB_ASSERT(m_n == rhs.m_n); memcpy(m_pSelectors, rhs.m_pSelectors, rhs.m_n); return *this; } }; void init(const params& params, results& result); bool compute(); private: struct potential_solution { potential_solution() : m_coords(), m_error(cUINT64_MAX), m_valid(false) { } etc1_solution_coordinates m_coords; crnlib::vector m_selectors; uint64 m_error; bool m_valid; void clear() { m_coords.clear(); m_selectors.resize(0); m_error = cUINT64_MAX; m_valid = false; } bool are_selectors_all_equal() const { if (m_selectors.empty()) return false; const uint s = m_selectors[0]; for (uint i = 1; i < m_selectors.size(); i++) if (m_selectors[i] != s) return false; return true; } }; const params* m_pParams; results* m_pResult; int m_limit; vec3F m_avg_color; int m_br, m_bg, m_bb; crnlib::vector m_luma; crnlib::vector m_sorted_luma[2]; const uint32* m_pSorted_luma_indices; uint32* m_pSorted_luma; crnlib::vector m_selectors; crnlib::vector m_best_selectors; potential_solution m_best_solution; potential_solution m_trial_solution; crnlib::vector m_temp_selectors; bool evaluate_solution(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution); bool evaluate_solution_fast(const etc1_solution_coordinates& coords, potential_solution& trial_solution, potential_solution* pBest_solution); }; struct pack_etc1_block_context { etc1_optimizer m_optimizer; }; void pack_etc1_block_init(); uint64 pack_etc1_block(etc1_block& block, const color_quad_u8* pSrc_pixels, crn_etc1_pack_params& pack_params, pack_etc1_block_context& context); } // namespace crnlib