// File: crn_qdxt.cpp // See Copyright Notice and license at the end of inc/crnlib.h #include "crn_core.h" #include "crn_qdxt1.h" #include "crn_dxt1.h" #include "crn_dxt_fast.h" #include "crn_image_utils.h" #include "crn_dxt_hc_common.h" #define GENERATE_DEBUG_IMAGES 0 namespace crnlib { qdxt1::qdxt1(task_pool& task_pool) : m_pTask_pool(&task_pool), m_main_thread_id(0), m_canceled(false), m_progress_start(0), m_progress_range(100), m_num_blocks(0), m_pBlocks(NULL), m_pDst_elements(NULL), m_elements_per_block(0), m_max_selector_clusters(0), m_prev_percentage_complete(-1), m_selector_clusterizer(task_pool) { } qdxt1::~qdxt1() { } void qdxt1::clear() { m_main_thread_id = 0; m_num_blocks = 0; m_pBlocks = 0; m_pDst_elements = NULL; m_elements_per_block = 0; m_params.clear(); m_endpoint_clusterizer.clear(); m_endpoint_cluster_indices.clear(); m_max_selector_clusters = 0; m_canceled = false; m_progress_start = 0; m_progress_range = 100; m_selector_clusterizer.clear(); for (uint i = 0; i <= qdxt1_params::cMaxQuality; i++) m_cached_selector_cluster_indices[i].clear(); m_cluster_hash.clear(); m_prev_percentage_complete = -1; } bool qdxt1::init(uint n, const dxt_pixel_block* pBlocks, const qdxt1_params& params) { clear(); CRNLIB_ASSERT(n && pBlocks); m_main_thread_id = crn_get_current_thread_id(); m_num_blocks = n; m_pBlocks = pBlocks; m_params = params; m_endpoint_clusterizer.reserve_training_vecs(m_num_blocks); m_progress_start = 0; m_progress_range = 75; const bool debugging = false; image_u8 debug_img; if ((m_params.m_hierarchical) && (m_params.m_num_mips)) { vec6F_clusterizer::training_vec_array& training_vecs = m_endpoint_clusterizer.get_training_vecs(); training_vecs.resize(m_num_blocks); uint encoding_hist[cNumChunkEncodings]; utils::zero_object(encoding_hist); uint total_processed_blocks = 0; uint next_progress_threshold = 512; for (uint level = 0; level < m_params.m_num_mips; level++) { const qdxt1_params::mip_desc& level_desc = m_params.m_mip_desc[level]; const uint num_chunks_x = (level_desc.m_block_width + cChunkBlockWidth - 1) / cChunkBlockWidth; const uint num_chunks_y = (level_desc.m_block_height + cChunkBlockHeight - 1) / cChunkBlockHeight; const uint level_width = level_desc.m_block_width * 4; const uint level_height = level_desc.m_block_height * 4; if (debugging) debug_img.resize(num_chunks_x * cChunkPixelWidth, num_chunks_y * cChunkPixelHeight); float adaptive_tile_color_psnr_derating = 1.5f; // was 2.4f if ((level) && (adaptive_tile_color_psnr_derating > .25f)) { adaptive_tile_color_psnr_derating = math::maximum(.25f, adaptive_tile_color_psnr_derating / powf(3.1f, static_cast(level))); // was 3.0f } for (uint chunk_y = 0; chunk_y < num_chunks_y; chunk_y++) { for (uint chunk_x = 0; chunk_x < num_chunks_x; chunk_x++) { color_quad_u8 chunk_pixels[cChunkPixelWidth * cChunkPixelHeight]; for (uint y = 0; y < cChunkPixelHeight; y++) { const uint pix_y = math::minimum(chunk_y * cChunkPixelHeight + y, level_height - 1); const uint outer_block_index = level_desc.m_first_block + ((pix_y >> 2) * level_desc.m_block_width); for (uint x = 0; x < cChunkPixelWidth; x++) { const uint pix_x = math::minimum(chunk_x * cChunkPixelWidth + x, level_width - 1); const uint block_index = outer_block_index + (pix_x >> 2); const dxt_pixel_block& block = m_pBlocks[block_index]; const color_quad_u8& p = block.m_pixels[pix_y & 3][pix_x & 3]; chunk_pixels[x + y * 8] = p; } } struct layout_results { uint m_low_color; uint m_high_color; uint8 m_selectors[cChunkPixelWidth * cChunkPixelHeight]; uint64 m_error; //float m_penalty; }; layout_results layouts[cNumChunkTileLayouts]; for (uint l = 0; l < cNumChunkTileLayouts; l++) { const uint width = g_chunk_tile_layouts[l].m_width; const uint height = g_chunk_tile_layouts[l].m_height; const uint x_ofs = g_chunk_tile_layouts[l].m_x_ofs; const uint y_ofs = g_chunk_tile_layouts[l].m_y_ofs; color_quad_u8 layout_pixels[cChunkPixelWidth * cChunkPixelHeight]; for (uint y = 0; y < height; y++) for (uint x = 0; x < width; x++) layout_pixels[x + y * width] = chunk_pixels[(x_ofs + x) + (y_ofs + y) * cChunkPixelWidth]; const uint n = width * height; dxt_fast::compress_color_block(n, layout_pixels, layouts[l].m_low_color, layouts[l].m_high_color, layouts[l].m_selectors); color_quad_u8 c[4]; dxt1_block::get_block_colors(c, static_cast(layouts[l].m_low_color), static_cast(layouts[l].m_high_color)); uint64 error = 0; for (uint i = 0; i < n; i++) error += color::elucidian_distance(layout_pixels[i], c[layouts[l].m_selectors[i]], false); layouts[l].m_error = error; #if 0 if ((width > 4) || (height > 4)) { const uint dist = color::elucidian_distance( dxt1_block::unpack_color(static_cast(layouts[l].m_low_color), true), dxt1_block::unpack_color(static_cast(layouts[l].m_high_color), true), false); layouts[l].m_penalty = math::clamp((sqrt((float)dist) - 75.0f) / 150.0f, 0.0f, 2.0f); if ((width == 8) && (height == 8)) layouts[l].m_penalty *= 2.0f; } else { layouts[l].m_penalty = 0.0f; } #endif } double best_peak_snr = -1.0f; uint best_encoding = 0; for (uint e = 0; e < cNumChunkEncodings; e++) { const chunk_encoding_desc& encoding_desc = g_chunk_encodings[e]; double total_error = 0; for (uint t = 0; t < encoding_desc.m_num_tiles; t++) total_error += (double)layouts[encoding_desc.m_tiles[t].m_layout_index].m_error; //double mean_squared = total_error * (1.0f / (16.0f * 3.0f)); double mean_squared = total_error * (1.0f / (64.0f * 3.0f)); double root_mean_squared = sqrt(mean_squared); double peak_snr = 999999.0f; if (mean_squared) peak_snr = math::clamp(log10(255.0f / root_mean_squared) * 20.0f, 0.0f, 500.0f); //if (level) // adaptive_tile_color_psnr_derating = math::lerp(adaptive_tile_color_psnr_derating * .5f, .3f, math::maximum((level - 1) / float(m_params.m_num_mips - 2), 1.0f)); float color_derating = math::lerp( 0.0f, adaptive_tile_color_psnr_derating, (g_chunk_encodings[e].m_num_tiles - 1) / 3.0f ); peak_snr = peak_snr - color_derating; //for (uint t = 0; t < encoding_desc.m_num_tiles; t++) // peak_snr -= (double)layouts[encoding_desc.m_tiles[t].m_layout_index].m_penalty; if (peak_snr > best_peak_snr) { best_peak_snr = peak_snr; best_encoding = e; } } encoding_hist[best_encoding]++; const chunk_encoding_desc& encoding_desc = g_chunk_encodings[best_encoding]; for (uint t = 0; t < encoding_desc.m_num_tiles; t++) { const chunk_tile_desc& tile_desc = encoding_desc.m_tiles[t]; uint layout_index = tile_desc.m_layout_index; const layout_results& layout = layouts[layout_index]; color_quad_u8 c[4]; if (debugging) dxt1_block::get_block_colors(c, static_cast(layout.m_low_color), static_cast(layout.m_high_color)); color_quad_u8 tile_pixels[cChunkPixelWidth * cChunkPixelHeight]; for (uint y = 0; y < tile_desc.m_height; y++) { const uint pix_y = y + tile_desc.m_y_ofs; for (uint x = 0; x < tile_desc.m_width; x++) { const uint pix_x = x + tile_desc.m_x_ofs; tile_pixels[x + y * tile_desc.m_width] = chunk_pixels[pix_x + pix_y * cChunkPixelWidth]; if (debugging) debug_img(chunk_x * 8 + pix_x, chunk_y * 8 + pix_y) = c[layout.m_selectors[x + y * tile_desc.m_width]]; } } color_quad_u8 l, h; dxt_fast::find_representative_colors(tile_desc.m_width * tile_desc.m_height, tile_pixels, l, h); //const uint dist = color::color_distance(m_params.m_perceptual, l, h, false); const uint dist = color::elucidian_distance(l, h, false); const uint cColorDistToWeight = 5000; const uint cMaxWeight = 8; uint weight = math::clamp(dist / cColorDistToWeight, 1, cMaxWeight); vec6F ev; ev[0] = l[0]; ev[1] = l[1]; ev[2] = l[2]; ev[3] = h[0]; ev[4] = h[1]; ev[5] = h[2]; for (uint y = 0; y < (tile_desc.m_height >> 2); y++) { uint block_y = chunk_y * cChunkBlockHeight + y + (tile_desc.m_y_ofs >> 2); if (block_y >= level_desc.m_block_height) continue; for (uint x = 0; x < (tile_desc.m_width >> 2); x++) { uint block_x = chunk_x * cChunkBlockWidth + x + (tile_desc.m_x_ofs >> 2); if (block_x >= level_desc.m_block_width) break; uint block_index = level_desc.m_first_block + block_x + block_y * level_desc.m_block_width; training_vecs[block_index].first = ev; training_vecs[block_index].second = weight; total_processed_blocks++; //if (debugging) //{ // debug_img(block_x, block_y) = l; // debug_img(block_x + level_desc.m_block_width, block_y) = h; //} } // x } // y } //t if (total_processed_blocks >= next_progress_threshold) { next_progress_threshold += 512; if (!update_progress(total_processed_blocks, m_num_blocks - 1)) return false; } } // chunk_x } // chunk_y #if GENERATE_DEBUG_IMAGES if (debugging) image_utils::write_to_file(dynamic_string(cVarArg, "debug_%u.tga", level).get_ptr(), debug_img, image_utils::cWriteFlagIgnoreAlpha); #endif } // level #if 0 trace("chunk encoding hist: "); for (uint i = 0; i < cNumChunkEncodings; i++) trace("%u ", encoding_hist[i]); trace("\n"); #endif } else { for (uint block_index = 0; block_index < m_num_blocks; block_index++) { if ((block_index & 511) == 0) { if (!update_progress(block_index, m_num_blocks - 1)) return false; } color_quad_u8 l, h; dxt_fast::find_representative_colors(cDXTBlockSize * cDXTBlockSize, &m_pBlocks[block_index].m_pixels[0][0], l, h); //const uint dist = color::color_distance(m_params.m_perceptual, l, h, false); const uint dist = color::elucidian_distance(l, h, false); const uint cColorDistToWeight = 5000; const uint cMaxWeight = 8; uint weight = math::clamp(dist / cColorDistToWeight, 1, cMaxWeight); vec6F ev; ev[0] = l[0]; ev[1] = l[1]; ev[2] = l[2]; ev[3] = h[0]; ev[4] = h[1]; ev[5] = h[2]; m_endpoint_clusterizer.add_training_vec(ev, weight); } } const uint cMaxEndpointClusters = 65535U; m_progress_start = 75; m_progress_range = 20; if (!m_endpoint_clusterizer.generate_codebook(cMaxEndpointClusters, generate_codebook_progress_callback, this)) return false; crnlib::hash_map selector_hash; m_progress_start = 95; m_progress_range = 5; for (uint block_index = 0; block_index < m_num_blocks; block_index++) { if ((block_index & 511) == 0) { if (!update_progress(block_index, m_num_blocks - 1)) return false; } dxt1_block dxt_blk; dxt_fast::compress_color_block(&dxt_blk, &m_pBlocks[block_index].m_pixels[0][0]); uint selectors = dxt_blk.m_selectors[0] | (dxt_blk.m_selectors[1] << 8) | (dxt_blk.m_selectors[2] << 16) | (dxt_blk.m_selectors[3] << 24); selector_hash.insert(selectors); } m_max_selector_clusters = selector_hash.size() + 128; // trace("max endpoint clusters: %u\n", m_endpoint_clusterizer.get_codebook_size()); // trace("max selector clusters: %u\n", m_max_selector_clusters); update_progress(1, 1); return true; } bool qdxt1::update_progress(uint value, uint max_value) { if (!m_params.m_pProgress_func) return true; uint percentage = max_value ? (m_progress_start + (value * m_progress_range + (max_value / 2)) / max_value) : 100; if ((int)percentage == m_prev_percentage_complete) return true; m_prev_percentage_complete = percentage; if (!m_params.m_pProgress_func(m_params.m_progress_start + (percentage * m_params.m_progress_range) / 100U, m_params.m_pProgress_data)) { m_canceled = true; return false; } return true; } void qdxt1::pack_endpoints_task(uint64 data, void* pData_ptr) { pData_ptr; const uint thread_index = static_cast(data); crnlib::vector cluster_pixels; cluster_pixels.reserve(1024); crnlib::vector selectors; selectors.reserve(1024); dxt1_endpoint_optimizer optimizer; dxt1_endpoint_optimizer::params p; dxt1_endpoint_optimizer::results r; p.m_quality = m_params.m_dxt_quality; p.m_use_alpha_blocks = m_params.m_use_alpha_blocks; p.m_dxt1a_alpha_threshold = m_params.m_dxt1a_alpha_threshold; p.m_perceptual = m_params.m_perceptual; uint cluster_index_progress_mask = math::next_pow2(m_endpoint_cluster_indices.size() / 100); cluster_index_progress_mask /= 2; cluster_index_progress_mask = math::maximum(cluster_index_progress_mask, 8); cluster_index_progress_mask -= 1; cluster_id cid; const crnlib::vector& indices = cid.m_cells; for (uint cluster_index = 0; cluster_index < m_endpoint_cluster_indices.size(); cluster_index++) { if (m_canceled) return; if ((cluster_index & cluster_index_progress_mask) == 0) { if (crn_get_current_thread_id() == m_main_thread_id) { if (!update_progress(cluster_index, m_endpoint_cluster_indices.size() - 1)) return; } } if (m_pTask_pool->get_num_threads()) { if ((cluster_index % (m_pTask_pool->get_num_threads() + 1)) != thread_index) continue; } const crnlib::vector& cluster_indices = m_endpoint_cluster_indices[cluster_index]; selectors.resize(cluster_indices.size() * cDXTBlockSize * cDXTBlockSize); bool found = false; uint32 found_endpoints = 0; cid.set(cluster_indices); { scoped_spinlock lock(m_cluster_hash_lock); cluster_hash::const_iterator it(m_cluster_hash.find(cid)); if (it != m_cluster_hash.end()) { CRNLIB_ASSERT(cid == it->first); found = true; found_endpoints = it->second; } } if (found) { const uint16 low_color = static_cast(found_endpoints); const uint16 high_color = static_cast((found_endpoints >> 16U)); color_quad_u8 block_colors[4]; dxt1_block::get_block_colors(block_colors, low_color, high_color); const bool is_alpha_block = (low_color <= high_color); for (uint block_iter = 0; block_iter < indices.size(); block_iter++) { const uint block_index = indices[block_iter]; const color_quad_u8* pSrc_pixels = &m_pBlocks[block_index].m_pixels[0][0]; for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) { dxt1_block& dxt_block = get_block(block_index); dxt_block.set_low_color(static_cast(low_color)); dxt_block.set_high_color(static_cast(high_color)); uint mask = 0; for (int i = 15; i >= 0; i--) { mask <<= 2; const color_quad_u8& c = pSrc_pixels[i]; uint dist0 = color::color_distance(m_params.m_perceptual, c, block_colors[0], false); uint dist1 = color::color_distance(m_params.m_perceptual, c, block_colors[1], false); uint dist2 = color::color_distance(m_params.m_perceptual, c, block_colors[2], false); uint selector = 0, best_dist = dist0; if (dist1 < best_dist) { selector = 1; best_dist = dist1; } if (dist2 < best_dist) { selector = 2; best_dist = dist2; } if (!is_alpha_block) { uint dist3 = color::color_distance(m_params.m_perceptual, c, block_colors[3], false); if (dist3 < best_dist) { selector = 3; } } else { if (c.a < m_params.m_dxt1a_alpha_threshold) selector = 3; } mask |= selector; } dxt_block.m_selectors[0] = static_cast(mask & 0xFF); dxt_block.m_selectors[1] = static_cast((mask >> 8) & 0xFF); dxt_block.m_selectors[2] = static_cast((mask >> 16) & 0xFF); dxt_block.m_selectors[3] = static_cast((mask >> 24) & 0xFF); } } } else { cluster_pixels.resize(indices.size() * cDXTBlockSize * cDXTBlockSize); color_quad_u8* pDst = &cluster_pixels[0]; bool has_alpha_pixels = false; for (uint block_iter = 0; block_iter < indices.size(); block_iter++) { const uint block_index = indices[block_iter]; //const color_quad_u8* pSrc_pixels = &m_pBlocks[block_index].m_pixels[0][0]; const color_quad_u8* pSrc_pixels = (const color_quad_u8*)m_pBlocks[block_index].m_pixels; for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) { const color_quad_u8& src = pSrc_pixels[i]; if (src.a < m_params.m_dxt1a_alpha_threshold) has_alpha_pixels = true; *pDst++ = src; } } p.m_block_index = cluster_index; p.m_num_pixels = cluster_pixels.size(); p.m_pPixels = cluster_pixels.begin(); r.m_pSelectors = selectors.begin(); uint low_color, high_color; if ((m_params.m_dxt_quality != cCRNDXTQualitySuperFast) || (has_alpha_pixels)) { p.m_pixels_have_alpha = has_alpha_pixels; optimizer.compute(p, r); low_color = r.m_low_color; high_color = r.m_high_color; } else { dxt_fast::compress_color_block(cluster_pixels.size(), cluster_pixels.begin(), low_color, high_color, selectors.begin(), true); } const uint8* pSrc_selectors = selectors.begin(); for (uint block_iter = 0; block_iter < indices.size(); block_iter++) { const uint block_index = indices[block_iter]; dxt1_block& dxt_block = get_block(block_index); dxt_block.set_low_color(static_cast(low_color)); dxt_block.set_high_color(static_cast(high_color)); uint mask = 0; for (int i = 15; i >= 0; i--) { mask <<= 2; mask |= pSrc_selectors[i]; } pSrc_selectors += (cDXTBlockSize * cDXTBlockSize); dxt_block.m_selectors[0] = static_cast(mask & 0xFF); dxt_block.m_selectors[1] = static_cast((mask >> 8) & 0xFF); dxt_block.m_selectors[2] = static_cast((mask >> 16) & 0xFF); dxt_block.m_selectors[3] = static_cast((mask >> 24) & 0xFF); } { scoped_spinlock lock(m_cluster_hash_lock); m_cluster_hash.insert(cid, low_color | (high_color << 16)); } } } } struct optimize_selectors_params { CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(optimize_selectors_params); optimize_selectors_params( crnlib::vector< crnlib::vector >& selector_cluster_indices) : m_selector_cluster_indices(selector_cluster_indices) { } crnlib::vector< crnlib::vector >& m_selector_cluster_indices; }; void qdxt1::optimize_selectors_task(uint64 data, void* pData_ptr) { const uint thread_index = static_cast(data); optimize_selectors_params& task_params = *static_cast(pData_ptr); crnlib::vector block_categories[2]; block_categories[0].reserve(2048); block_categories[1].reserve(2048); for (uint cluster_index = 0; cluster_index < task_params.m_selector_cluster_indices.size(); cluster_index++) { if (m_canceled) return; if ((cluster_index & 255) == 0) { if (crn_get_current_thread_id() == m_main_thread_id) { if (!update_progress(cluster_index, task_params.m_selector_cluster_indices.size() - 1)) return; } } if (m_pTask_pool->get_num_threads()) { if ((cluster_index % (m_pTask_pool->get_num_threads() + 1)) != thread_index) continue; } const crnlib::vector& selector_indices = task_params.m_selector_cluster_indices[cluster_index]; if (selector_indices.size() <= 1) continue; block_categories[0].resize(0); block_categories[1].resize(0); for (uint block_iter = 0; block_iter < selector_indices.size(); block_iter++) { const uint block_index = selector_indices[block_iter]; const dxt1_block& src_block = get_block(block_index); if (!src_block.is_alpha_block()) block_categories[0].push_back(block_index); else { bool has_alpha_pixels = false; if (m_params.m_dxt1a_alpha_threshold > 0) { const color_quad_u8* pSrc_pixels = (const color_quad_u8*)m_pBlocks[block_index].m_pixels; for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++) { const color_quad_u8& src = pSrc_pixels[i]; if (src.a < m_params.m_dxt1a_alpha_threshold) { has_alpha_pixels = true; break; } } } if (has_alpha_pixels) continue; block_categories[1].push_back(block_index); } } dxt1_block blk; utils::zero_object(blk); for (uint block_type = 0; block_type <= 1; block_type++) { const crnlib::vector& block_indices = block_categories[block_type]; if (block_indices.size() <= 1) continue; for (uint y = 0; y < 4; y++) { for (uint x = 0; x < 4; x++) { uint best_s = 0; uint64 best_error = 0xFFFFFFFFFFULL; uint max_s = 4; if (block_type == 1) max_s = 3; for (uint s = 0; s < max_s; s++) { uint64 total_error = 0; for (uint block_iter = 0; block_iter < block_indices.size(); block_iter++) { const uint block_index = block_indices[block_iter]; const color_quad_u8& orig_color = m_pBlocks[block_index].m_pixels[y][x]; const dxt1_block& dst_block = get_block(block_index); color_quad_u8 colors[4]; dxt1_block::get_block_colors(colors, static_cast(dst_block.get_low_color()), static_cast(dst_block.get_high_color())); uint error = color::color_distance(m_params.m_perceptual, orig_color, colors[s], false); total_error += error; } if (total_error < best_error) { best_error = total_error; best_s = s; } } blk.set_selector(x, y, best_s); } // x } // y for (uint block_iter = 0; block_iter < block_indices.size(); block_iter++) { const uint block_index = block_indices[block_iter]; dxt1_block& dst_block = get_block(block_index); memcpy(dst_block.m_selectors, blk.m_selectors, sizeof(dst_block.m_selectors)); } } } // cluster_index } bool qdxt1::generate_codebook_progress_callback(uint percentage_completed, void* pData) { return static_cast(pData)->update_progress(percentage_completed, 100U); } bool qdxt1::create_selector_clusters(uint max_selector_clusters, crnlib::vector< crnlib::vector >& selector_cluster_indices) { m_progress_start = m_progress_range; m_progress_range = 33; weighted_selector_vec_array selector_vecs(m_num_blocks); for (uint block_iter = 0; block_iter < m_num_blocks; block_iter++) { dxt1_block& dxt1_block = get_block(block_iter); vec16F sv; float* pDst = &sv[0]; for (uint y = 0; y < 4; y++) for (uint x = 0; x < 4; x++) *pDst++ = g_dxt1_to_linear[dxt1_block.get_selector(x, y)]; const color_quad_u8 first_color(dxt1_block::unpack_color((uint16)dxt1_block.get_low_color(), true)); const color_quad_u8 second_color(dxt1_block::unpack_color((uint16)dxt1_block.get_high_color(), true)); const uint dist = color::color_distance(m_params.m_perceptual, first_color, second_color, false); const uint cColorDistToWeight = 2000; const uint cMaxWeight = 2048; uint weight = math::clamp(dist / cColorDistToWeight, 1, cMaxWeight); selector_vecs[block_iter].m_vec = sv; selector_vecs[block_iter].m_weight = weight; } return m_selector_clusterizer.create_clusters( selector_vecs, max_selector_clusters, selector_cluster_indices, generate_codebook_progress_callback, this); } bool qdxt1::pack(dxt1_block* pDst_elements, uint elements_per_block, const qdxt1_params& params, float quality_power_mul) { CRNLIB_ASSERT(m_num_blocks); m_main_thread_id = crn_get_current_thread_id(); m_canceled = false; m_pDst_elements = pDst_elements; m_elements_per_block = elements_per_block; m_params = params; if (!m_params.m_use_alpha_blocks) m_params.m_dxt1a_alpha_threshold = 0; m_prev_percentage_complete = -1; CRNLIB_ASSERT(m_params.m_quality_level <= qdxt1_params::cMaxQuality); const float quality = m_params.m_quality_level / (float)qdxt1_params::cMaxQuality; const float endpoint_quality = powf(quality, 1.8f * quality_power_mul); const float selector_quality = powf(quality, 1.65f * quality_power_mul); //const uint max_endpoint_clusters = math::clamp(static_cast(m_endpoint_clusterizer.get_codebook_size() * endpoint_quality), 128U, m_endpoint_clusterizer.get_codebook_size()); //const uint max_selector_clusters = math::clamp(static_cast(m_max_selector_clusters * selector_quality), 150U, m_max_selector_clusters); const uint max_endpoint_clusters = math::clamp(static_cast(m_endpoint_clusterizer.get_codebook_size() * endpoint_quality), 96U, m_endpoint_clusterizer.get_codebook_size()); const uint max_selector_clusters = math::clamp(static_cast(m_max_selector_clusters * selector_quality), 128U, m_max_selector_clusters); if (quality >= 1.0f) { m_endpoint_cluster_indices.resize(m_num_blocks); for (uint i = 0; i < m_num_blocks; i++) { m_endpoint_cluster_indices[i].resize(1); m_endpoint_cluster_indices[i][0] = i; } } else m_endpoint_clusterizer.retrieve_clusters(max_endpoint_clusters, m_endpoint_cluster_indices); // trace("endpoint clusters: %u\n", m_endpoint_cluster_indices.size()); uint total_blocks = 0; uint max_blocks = 0; for (uint i = 0; i < m_endpoint_cluster_indices.size(); i++) { uint num = m_endpoint_cluster_indices[i].size(); total_blocks += num; max_blocks = math::maximum(max_blocks, num); } #if 0 trace("Num clusters: %u, Average blocks per cluster: %u, Max blocks per cluster: %u\n", m_endpoint_cluster_indices.size(), total_blocks / m_endpoint_cluster_indices.size(), max_blocks); #endif crnlib::vector< crnlib::vector >& selector_cluster_indices = m_cached_selector_cluster_indices[params.m_quality_level]; m_progress_start = 0; if (quality >= 1.0f) m_progress_range = 100; else if (selector_cluster_indices.empty()) m_progress_range = (m_params.m_dxt_quality == cCRNDXTQualitySuperFast) ? 10 : 33; else m_progress_range = (m_params.m_dxt_quality == cCRNDXTQualitySuperFast) ? 10 : 50; for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) m_pTask_pool->queue_object_task(this, &qdxt1::pack_endpoints_task, i); m_pTask_pool->join(); if (m_canceled) return false; if (quality >= 1.0f) return true; if (selector_cluster_indices.empty()) { create_selector_clusters(max_selector_clusters, selector_cluster_indices); if (m_canceled) { selector_cluster_indices.clear(); return false; } } m_progress_start += m_progress_range; m_progress_range = 100 - m_progress_start; optimize_selectors_params optimize_selectors_task_params(selector_cluster_indices); for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++) m_pTask_pool->queue_object_task(this, &qdxt1::optimize_selectors_task, i, &optimize_selectors_task_params); m_pTask_pool->join(); return !m_canceled; } } // namespace crnlib