911 lines
33 KiB
C++
911 lines
33 KiB
C++
|
// File: crn_qdxt.cpp
|
||
|
// See Copyright Notice and license at the end of inc/crnlib.h
|
||
|
#include "crn_core.h"
|
||
|
#include "crn_qdxt1.h"
|
||
|
#include "crn_dxt1.h"
|
||
|
#include "crn_dxt_fast.h"
|
||
|
#include "crn_image_utils.h"
|
||
|
#include "crn_dxt_hc_common.h"
|
||
|
|
||
|
#define GENERATE_DEBUG_IMAGES 0
|
||
|
|
||
|
namespace crnlib
|
||
|
{
|
||
|
qdxt1::qdxt1(task_pool& task_pool) :
|
||
|
m_pTask_pool(&task_pool),
|
||
|
m_main_thread_id(0),
|
||
|
m_canceled(false),
|
||
|
m_progress_start(0),
|
||
|
m_progress_range(100),
|
||
|
m_num_blocks(0),
|
||
|
m_pBlocks(NULL),
|
||
|
m_pDst_elements(NULL),
|
||
|
m_elements_per_block(0),
|
||
|
m_max_selector_clusters(0),
|
||
|
m_prev_percentage_complete(-1),
|
||
|
m_selector_clusterizer(task_pool)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
qdxt1::~qdxt1()
|
||
|
{
|
||
|
}
|
||
|
|
||
|
void qdxt1::clear()
|
||
|
{
|
||
|
m_main_thread_id = 0;
|
||
|
m_num_blocks = 0;
|
||
|
m_pBlocks = 0;
|
||
|
m_pDst_elements = NULL;
|
||
|
m_elements_per_block = 0;
|
||
|
m_params.clear();
|
||
|
m_endpoint_clusterizer.clear();
|
||
|
m_endpoint_cluster_indices.clear();
|
||
|
m_max_selector_clusters = 0;
|
||
|
m_canceled = false;
|
||
|
m_progress_start = 0;
|
||
|
m_progress_range = 100;
|
||
|
m_selector_clusterizer.clear();
|
||
|
|
||
|
for (uint i = 0; i <= qdxt1_params::cMaxQuality; i++)
|
||
|
m_cached_selector_cluster_indices[i].clear();
|
||
|
|
||
|
m_cluster_hash.clear();
|
||
|
|
||
|
m_prev_percentage_complete = -1;
|
||
|
}
|
||
|
|
||
|
bool qdxt1::init(uint n, const dxt_pixel_block* pBlocks, const qdxt1_params& params)
|
||
|
{
|
||
|
clear();
|
||
|
|
||
|
CRNLIB_ASSERT(n && pBlocks);
|
||
|
|
||
|
m_main_thread_id = crn_get_current_thread_id();
|
||
|
|
||
|
m_num_blocks = n;
|
||
|
m_pBlocks = pBlocks;
|
||
|
m_params = params;
|
||
|
|
||
|
m_endpoint_clusterizer.reserve_training_vecs(m_num_blocks);
|
||
|
|
||
|
m_progress_start = 0;
|
||
|
m_progress_range = 75;
|
||
|
|
||
|
const bool debugging = false;
|
||
|
image_u8 debug_img;
|
||
|
|
||
|
if ((m_params.m_hierarchical) && (m_params.m_num_mips))
|
||
|
{
|
||
|
vec6F_clusterizer::training_vec_array& training_vecs = m_endpoint_clusterizer.get_training_vecs();
|
||
|
training_vecs.resize(m_num_blocks);
|
||
|
|
||
|
uint encoding_hist[cNumChunkEncodings];
|
||
|
utils::zero_object(encoding_hist);
|
||
|
|
||
|
uint total_processed_blocks = 0;
|
||
|
uint next_progress_threshold = 512;
|
||
|
|
||
|
for (uint level = 0; level < m_params.m_num_mips; level++)
|
||
|
{
|
||
|
const qdxt1_params::mip_desc& level_desc = m_params.m_mip_desc[level];
|
||
|
|
||
|
const uint num_chunks_x = (level_desc.m_block_width + cChunkBlockWidth - 1) / cChunkBlockWidth;
|
||
|
const uint num_chunks_y = (level_desc.m_block_height + cChunkBlockHeight - 1) / cChunkBlockHeight;
|
||
|
|
||
|
const uint level_width = level_desc.m_block_width * 4;
|
||
|
const uint level_height = level_desc.m_block_height * 4;
|
||
|
|
||
|
if (debugging)
|
||
|
debug_img.resize(num_chunks_x * cChunkPixelWidth, num_chunks_y * cChunkPixelHeight);
|
||
|
|
||
|
float adaptive_tile_color_psnr_derating = 1.5f; // was 2.4f
|
||
|
if ((level) && (adaptive_tile_color_psnr_derating > .25f))
|
||
|
{
|
||
|
adaptive_tile_color_psnr_derating = math::maximum(.25f, adaptive_tile_color_psnr_derating / powf(3.1f, static_cast<float>(level))); // was 3.0f
|
||
|
}
|
||
|
for (uint chunk_y = 0; chunk_y < num_chunks_y; chunk_y++)
|
||
|
{
|
||
|
for (uint chunk_x = 0; chunk_x < num_chunks_x; chunk_x++)
|
||
|
{
|
||
|
color_quad_u8 chunk_pixels[cChunkPixelWidth * cChunkPixelHeight];
|
||
|
|
||
|
for (uint y = 0; y < cChunkPixelHeight; y++)
|
||
|
{
|
||
|
const uint pix_y = math::minimum<uint>(chunk_y * cChunkPixelHeight + y, level_height - 1);
|
||
|
|
||
|
const uint outer_block_index = level_desc.m_first_block + ((pix_y >> 2) * level_desc.m_block_width);
|
||
|
|
||
|
for (uint x = 0; x < cChunkPixelWidth; x++)
|
||
|
{
|
||
|
const uint pix_x = math::minimum<uint>(chunk_x * cChunkPixelWidth + x, level_width - 1);
|
||
|
|
||
|
const uint block_index = outer_block_index + (pix_x >> 2);
|
||
|
|
||
|
const dxt_pixel_block& block = m_pBlocks[block_index];
|
||
|
|
||
|
const color_quad_u8& p = block.m_pixels[pix_y & 3][pix_x & 3];
|
||
|
|
||
|
chunk_pixels[x + y * 8] = p;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
struct layout_results
|
||
|
{
|
||
|
uint m_low_color;
|
||
|
uint m_high_color;
|
||
|
uint8 m_selectors[cChunkPixelWidth * cChunkPixelHeight];
|
||
|
uint64 m_error;
|
||
|
//float m_penalty;
|
||
|
};
|
||
|
layout_results layouts[cNumChunkTileLayouts];
|
||
|
|
||
|
for (uint l = 0; l < cNumChunkTileLayouts; l++)
|
||
|
{
|
||
|
const uint width = g_chunk_tile_layouts[l].m_width;
|
||
|
const uint height = g_chunk_tile_layouts[l].m_height;
|
||
|
const uint x_ofs = g_chunk_tile_layouts[l].m_x_ofs;
|
||
|
const uint y_ofs = g_chunk_tile_layouts[l].m_y_ofs;
|
||
|
|
||
|
color_quad_u8 layout_pixels[cChunkPixelWidth * cChunkPixelHeight];
|
||
|
for (uint y = 0; y < height; y++)
|
||
|
for (uint x = 0; x < width; x++)
|
||
|
layout_pixels[x + y * width] = chunk_pixels[(x_ofs + x) + (y_ofs + y) * cChunkPixelWidth];
|
||
|
|
||
|
const uint n = width * height;
|
||
|
dxt_fast::compress_color_block(n, layout_pixels, layouts[l].m_low_color, layouts[l].m_high_color, layouts[l].m_selectors);
|
||
|
|
||
|
color_quad_u8 c[4];
|
||
|
dxt1_block::get_block_colors(c, static_cast<uint16>(layouts[l].m_low_color), static_cast<uint16>(layouts[l].m_high_color));
|
||
|
|
||
|
uint64 error = 0;
|
||
|
for (uint i = 0; i < n; i++)
|
||
|
error += color::elucidian_distance(layout_pixels[i], c[layouts[l].m_selectors[i]], false);
|
||
|
|
||
|
layouts[l].m_error = error;
|
||
|
|
||
|
#if 0
|
||
|
if ((width > 4) || (height > 4))
|
||
|
{
|
||
|
const uint dist = color::elucidian_distance(
|
||
|
dxt1_block::unpack_color(static_cast<uint16>(layouts[l].m_low_color), true),
|
||
|
dxt1_block::unpack_color(static_cast<uint16>(layouts[l].m_high_color), true), false);
|
||
|
|
||
|
layouts[l].m_penalty = math::clamp((sqrt((float)dist) - 75.0f) / 150.0f, 0.0f, 2.0f);
|
||
|
if ((width == 8) && (height == 8))
|
||
|
layouts[l].m_penalty *= 2.0f;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
layouts[l].m_penalty = 0.0f;
|
||
|
}
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
double best_peak_snr = -1.0f;
|
||
|
uint best_encoding = 0;
|
||
|
|
||
|
for (uint e = 0; e < cNumChunkEncodings; e++)
|
||
|
{
|
||
|
const chunk_encoding_desc& encoding_desc = g_chunk_encodings[e];
|
||
|
|
||
|
double total_error = 0;
|
||
|
|
||
|
for (uint t = 0; t < encoding_desc.m_num_tiles; t++)
|
||
|
total_error += (double)layouts[encoding_desc.m_tiles[t].m_layout_index].m_error;
|
||
|
|
||
|
//double mean_squared = total_error * (1.0f / (16.0f * 3.0f));
|
||
|
double mean_squared = total_error * (1.0f / (64.0f * 3.0f));
|
||
|
double root_mean_squared = sqrt(mean_squared);
|
||
|
|
||
|
double peak_snr = 999999.0f;
|
||
|
if (mean_squared)
|
||
|
peak_snr = math::clamp<double>(log10(255.0f / root_mean_squared) * 20.0f, 0.0f, 500.0f);
|
||
|
|
||
|
//if (level)
|
||
|
// adaptive_tile_color_psnr_derating = math::lerp(adaptive_tile_color_psnr_derating * .5f, .3f, math::maximum((level - 1) / float(m_params.m_num_mips - 2), 1.0f));
|
||
|
|
||
|
float color_derating = math::lerp( 0.0f, adaptive_tile_color_psnr_derating, (g_chunk_encodings[e].m_num_tiles - 1) / 3.0f );
|
||
|
peak_snr = peak_snr - color_derating;
|
||
|
|
||
|
//for (uint t = 0; t < encoding_desc.m_num_tiles; t++)
|
||
|
// peak_snr -= (double)layouts[encoding_desc.m_tiles[t].m_layout_index].m_penalty;
|
||
|
|
||
|
if (peak_snr > best_peak_snr)
|
||
|
{
|
||
|
best_peak_snr = peak_snr;
|
||
|
best_encoding = e;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
encoding_hist[best_encoding]++;
|
||
|
|
||
|
const chunk_encoding_desc& encoding_desc = g_chunk_encodings[best_encoding];
|
||
|
|
||
|
for (uint t = 0; t < encoding_desc.m_num_tiles; t++)
|
||
|
{
|
||
|
const chunk_tile_desc& tile_desc = encoding_desc.m_tiles[t];
|
||
|
|
||
|
uint layout_index = tile_desc.m_layout_index;
|
||
|
const layout_results& layout = layouts[layout_index];
|
||
|
color_quad_u8 c[4];
|
||
|
if (debugging)
|
||
|
dxt1_block::get_block_colors(c, static_cast<uint16>(layout.m_low_color), static_cast<uint16>(layout.m_high_color));
|
||
|
|
||
|
color_quad_u8 tile_pixels[cChunkPixelWidth * cChunkPixelHeight];
|
||
|
|
||
|
for (uint y = 0; y < tile_desc.m_height; y++)
|
||
|
{
|
||
|
const uint pix_y = y + tile_desc.m_y_ofs;
|
||
|
|
||
|
for (uint x = 0; x < tile_desc.m_width; x++)
|
||
|
{
|
||
|
const uint pix_x = x + tile_desc.m_x_ofs;
|
||
|
|
||
|
tile_pixels[x + y * tile_desc.m_width] = chunk_pixels[pix_x + pix_y * cChunkPixelWidth];
|
||
|
|
||
|
if (debugging)
|
||
|
debug_img(chunk_x * 8 + pix_x, chunk_y * 8 + pix_y) = c[layout.m_selectors[x + y * tile_desc.m_width]];
|
||
|
}
|
||
|
}
|
||
|
|
||
|
color_quad_u8 l, h;
|
||
|
dxt_fast::find_representative_colors(tile_desc.m_width * tile_desc.m_height, tile_pixels, l, h);
|
||
|
|
||
|
//const uint dist = color::color_distance(m_params.m_perceptual, l, h, false);
|
||
|
const uint dist = color::elucidian_distance(l, h, false);
|
||
|
|
||
|
const uint cColorDistToWeight = 5000;
|
||
|
const uint cMaxWeight = 8;
|
||
|
uint weight = math::clamp<uint>(dist / cColorDistToWeight, 1, cMaxWeight);
|
||
|
|
||
|
vec6F ev;
|
||
|
|
||
|
ev[0] = l[0]; ev[1] = l[1]; ev[2] = l[2];
|
||
|
ev[3] = h[0]; ev[4] = h[1]; ev[5] = h[2];
|
||
|
|
||
|
for (uint y = 0; y < (tile_desc.m_height >> 2); y++)
|
||
|
{
|
||
|
uint block_y = chunk_y * cChunkBlockHeight + y + (tile_desc.m_y_ofs >> 2);
|
||
|
if (block_y >= level_desc.m_block_height)
|
||
|
continue;
|
||
|
|
||
|
for (uint x = 0; x < (tile_desc.m_width >> 2); x++)
|
||
|
{
|
||
|
uint block_x = chunk_x * cChunkBlockWidth + x + (tile_desc.m_x_ofs >> 2);
|
||
|
if (block_x >= level_desc.m_block_width)
|
||
|
break;
|
||
|
|
||
|
uint block_index = level_desc.m_first_block + block_x + block_y * level_desc.m_block_width;
|
||
|
|
||
|
training_vecs[block_index].first = ev;
|
||
|
training_vecs[block_index].second = weight;
|
||
|
|
||
|
total_processed_blocks++;
|
||
|
|
||
|
//if (debugging)
|
||
|
//{
|
||
|
// debug_img(block_x, block_y) = l;
|
||
|
// debug_img(block_x + level_desc.m_block_width, block_y) = h;
|
||
|
//}
|
||
|
|
||
|
} // x
|
||
|
} // y
|
||
|
} //t
|
||
|
|
||
|
if (total_processed_blocks >= next_progress_threshold)
|
||
|
{
|
||
|
next_progress_threshold += 512;
|
||
|
|
||
|
if (!update_progress(total_processed_blocks, m_num_blocks - 1))
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
} // chunk_x
|
||
|
} // chunk_y
|
||
|
|
||
|
#if GENERATE_DEBUG_IMAGES
|
||
|
if (debugging)
|
||
|
image_utils::write_to_file(dynamic_string(cVarArg, "debug_%u.tga", level).get_ptr(), debug_img, image_utils::cWriteFlagIgnoreAlpha);
|
||
|
#endif
|
||
|
|
||
|
} // level
|
||
|
|
||
|
#if 0
|
||
|
trace("chunk encoding hist: ");
|
||
|
for (uint i = 0; i < cNumChunkEncodings; i++)
|
||
|
trace("%u ", encoding_hist[i]);
|
||
|
trace("\n");
|
||
|
#endif
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
for (uint block_index = 0; block_index < m_num_blocks; block_index++)
|
||
|
{
|
||
|
if ((block_index & 511) == 0)
|
||
|
{
|
||
|
if (!update_progress(block_index, m_num_blocks - 1))
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
color_quad_u8 l, h;
|
||
|
dxt_fast::find_representative_colors(cDXTBlockSize * cDXTBlockSize, &m_pBlocks[block_index].m_pixels[0][0], l, h);
|
||
|
|
||
|
//const uint dist = color::color_distance(m_params.m_perceptual, l, h, false);
|
||
|
const uint dist = color::elucidian_distance(l, h, false);
|
||
|
|
||
|
const uint cColorDistToWeight = 5000;
|
||
|
const uint cMaxWeight = 8;
|
||
|
uint weight = math::clamp<uint>(dist / cColorDistToWeight, 1, cMaxWeight);
|
||
|
|
||
|
vec6F ev;
|
||
|
|
||
|
ev[0] = l[0]; ev[1] = l[1]; ev[2] = l[2];
|
||
|
ev[3] = h[0]; ev[4] = h[1]; ev[5] = h[2];
|
||
|
|
||
|
m_endpoint_clusterizer.add_training_vec(ev, weight);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
const uint cMaxEndpointClusters = 65535U;
|
||
|
|
||
|
m_progress_start = 75;
|
||
|
m_progress_range = 20;
|
||
|
|
||
|
if (!m_endpoint_clusterizer.generate_codebook(cMaxEndpointClusters, generate_codebook_progress_callback, this))
|
||
|
return false;
|
||
|
|
||
|
crnlib::hash_map<uint, empty_type> selector_hash;
|
||
|
|
||
|
m_progress_start = 95;
|
||
|
m_progress_range = 5;
|
||
|
|
||
|
for (uint block_index = 0; block_index < m_num_blocks; block_index++)
|
||
|
{
|
||
|
if ((block_index & 511) == 0)
|
||
|
{
|
||
|
if (!update_progress(block_index, m_num_blocks - 1))
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
dxt1_block dxt_blk;
|
||
|
dxt_fast::compress_color_block(&dxt_blk, &m_pBlocks[block_index].m_pixels[0][0]);
|
||
|
|
||
|
uint selectors = dxt_blk.m_selectors[0] | (dxt_blk.m_selectors[1] << 8) | (dxt_blk.m_selectors[2] << 16) | (dxt_blk.m_selectors[3] << 24);
|
||
|
|
||
|
selector_hash.insert(selectors);
|
||
|
}
|
||
|
|
||
|
m_max_selector_clusters = selector_hash.size() + 128;
|
||
|
|
||
|
// trace("max endpoint clusters: %u\n", m_endpoint_clusterizer.get_codebook_size());
|
||
|
// trace("max selector clusters: %u\n", m_max_selector_clusters);
|
||
|
|
||
|
update_progress(1, 1);
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
bool qdxt1::update_progress(uint value, uint max_value)
|
||
|
{
|
||
|
if (!m_params.m_pProgress_func)
|
||
|
return true;
|
||
|
|
||
|
uint percentage = max_value ? (m_progress_start + (value * m_progress_range + (max_value / 2)) / max_value) : 100;
|
||
|
if ((int)percentage == m_prev_percentage_complete)
|
||
|
return true;
|
||
|
m_prev_percentage_complete = percentage;
|
||
|
|
||
|
if (!m_params.m_pProgress_func(m_params.m_progress_start + (percentage * m_params.m_progress_range) / 100U, m_params.m_pProgress_data))
|
||
|
{
|
||
|
m_canceled = true;
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
void qdxt1::pack_endpoints_task(uint64 data, void* pData_ptr)
|
||
|
{
|
||
|
pData_ptr;
|
||
|
const uint thread_index = static_cast<uint>(data);
|
||
|
|
||
|
crnlib::vector<color_quad_u8> cluster_pixels;
|
||
|
cluster_pixels.reserve(1024);
|
||
|
|
||
|
crnlib::vector<uint8> selectors;
|
||
|
selectors.reserve(1024);
|
||
|
|
||
|
dxt1_endpoint_optimizer optimizer;
|
||
|
dxt1_endpoint_optimizer::params p;
|
||
|
dxt1_endpoint_optimizer::results r;
|
||
|
|
||
|
p.m_quality = m_params.m_dxt_quality;
|
||
|
p.m_use_alpha_blocks = m_params.m_use_alpha_blocks;
|
||
|
p.m_dxt1a_alpha_threshold = m_params.m_dxt1a_alpha_threshold;
|
||
|
p.m_perceptual = m_params.m_perceptual;
|
||
|
|
||
|
uint cluster_index_progress_mask = math::next_pow2(m_endpoint_cluster_indices.size() / 100);
|
||
|
cluster_index_progress_mask /= 2;
|
||
|
cluster_index_progress_mask = math::maximum<uint>(cluster_index_progress_mask, 8);
|
||
|
cluster_index_progress_mask -= 1;
|
||
|
|
||
|
cluster_id cid;
|
||
|
const crnlib::vector<uint32>& indices = cid.m_cells;
|
||
|
|
||
|
for (uint cluster_index = 0; cluster_index < m_endpoint_cluster_indices.size(); cluster_index++)
|
||
|
{
|
||
|
if (m_canceled)
|
||
|
return;
|
||
|
|
||
|
if ((cluster_index & cluster_index_progress_mask) == 0)
|
||
|
{
|
||
|
if (crn_get_current_thread_id() == m_main_thread_id)
|
||
|
{
|
||
|
if (!update_progress(cluster_index, m_endpoint_cluster_indices.size() - 1))
|
||
|
return;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (m_pTask_pool->get_num_threads())
|
||
|
{
|
||
|
if ((cluster_index % (m_pTask_pool->get_num_threads() + 1)) != thread_index)
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
const crnlib::vector<uint>& cluster_indices = m_endpoint_cluster_indices[cluster_index];
|
||
|
|
||
|
selectors.resize(cluster_indices.size() * cDXTBlockSize * cDXTBlockSize);
|
||
|
|
||
|
bool found = false;
|
||
|
uint32 found_endpoints = 0;
|
||
|
|
||
|
cid.set(cluster_indices);
|
||
|
|
||
|
{
|
||
|
scoped_spinlock lock(m_cluster_hash_lock);
|
||
|
|
||
|
cluster_hash::const_iterator it(m_cluster_hash.find(cid));
|
||
|
if (it != m_cluster_hash.end())
|
||
|
{
|
||
|
CRNLIB_ASSERT(cid == it->first);
|
||
|
|
||
|
found = true;
|
||
|
found_endpoints = it->second;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (found)
|
||
|
{
|
||
|
const uint16 low_color = static_cast<uint16>(found_endpoints);
|
||
|
const uint16 high_color = static_cast<uint16>((found_endpoints >> 16U));
|
||
|
|
||
|
color_quad_u8 block_colors[4];
|
||
|
dxt1_block::get_block_colors(block_colors, low_color, high_color);
|
||
|
|
||
|
const bool is_alpha_block = (low_color <= high_color);
|
||
|
|
||
|
for (uint block_iter = 0; block_iter < indices.size(); block_iter++)
|
||
|
{
|
||
|
const uint block_index = indices[block_iter];
|
||
|
|
||
|
const color_quad_u8* pSrc_pixels = &m_pBlocks[block_index].m_pixels[0][0];
|
||
|
|
||
|
for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++)
|
||
|
{
|
||
|
dxt1_block& dxt_block = get_block(block_index);
|
||
|
|
||
|
dxt_block.set_low_color(static_cast<uint16>(low_color));
|
||
|
dxt_block.set_high_color(static_cast<uint16>(high_color));
|
||
|
|
||
|
uint mask = 0;
|
||
|
for (int i = 15; i >= 0; i--)
|
||
|
{
|
||
|
mask <<= 2;
|
||
|
|
||
|
const color_quad_u8& c = pSrc_pixels[i];
|
||
|
|
||
|
uint dist0 = color::color_distance(m_params.m_perceptual, c, block_colors[0], false);
|
||
|
uint dist1 = color::color_distance(m_params.m_perceptual, c, block_colors[1], false);
|
||
|
uint dist2 = color::color_distance(m_params.m_perceptual, c, block_colors[2], false);
|
||
|
|
||
|
uint selector = 0, best_dist = dist0;
|
||
|
|
||
|
if (dist1 < best_dist) { selector = 1; best_dist = dist1; }
|
||
|
if (dist2 < best_dist) { selector = 2; best_dist = dist2; }
|
||
|
|
||
|
if (!is_alpha_block)
|
||
|
{
|
||
|
uint dist3 = color::color_distance(m_params.m_perceptual, c, block_colors[3], false);
|
||
|
if (dist3 < best_dist) { selector = 3; }
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
if (c.a < m_params.m_dxt1a_alpha_threshold)
|
||
|
selector = 3;
|
||
|
}
|
||
|
|
||
|
mask |= selector;
|
||
|
}
|
||
|
|
||
|
dxt_block.m_selectors[0] = static_cast<uint8>(mask & 0xFF);
|
||
|
dxt_block.m_selectors[1] = static_cast<uint8>((mask >> 8) & 0xFF);
|
||
|
dxt_block.m_selectors[2] = static_cast<uint8>((mask >> 16) & 0xFF);
|
||
|
dxt_block.m_selectors[3] = static_cast<uint8>((mask >> 24) & 0xFF);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
cluster_pixels.resize(indices.size() * cDXTBlockSize * cDXTBlockSize);
|
||
|
|
||
|
color_quad_u8* pDst = &cluster_pixels[0];
|
||
|
|
||
|
bool has_alpha_pixels = false;
|
||
|
|
||
|
for (uint block_iter = 0; block_iter < indices.size(); block_iter++)
|
||
|
{
|
||
|
const uint block_index = indices[block_iter];
|
||
|
|
||
|
//const color_quad_u8* pSrc_pixels = &m_pBlocks[block_index].m_pixels[0][0];
|
||
|
const color_quad_u8* pSrc_pixels = (const color_quad_u8*)m_pBlocks[block_index].m_pixels;
|
||
|
|
||
|
for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++)
|
||
|
{
|
||
|
const color_quad_u8& src = pSrc_pixels[i];
|
||
|
|
||
|
if (src.a < m_params.m_dxt1a_alpha_threshold)
|
||
|
has_alpha_pixels = true;
|
||
|
|
||
|
*pDst++ = src;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
p.m_block_index = cluster_index;
|
||
|
p.m_num_pixels = cluster_pixels.size();
|
||
|
p.m_pPixels = cluster_pixels.begin();
|
||
|
|
||
|
r.m_pSelectors = selectors.begin();
|
||
|
|
||
|
uint low_color, high_color;
|
||
|
if ((m_params.m_dxt_quality != cCRNDXTQualitySuperFast) || (has_alpha_pixels))
|
||
|
{
|
||
|
p.m_pixels_have_alpha = has_alpha_pixels;
|
||
|
|
||
|
optimizer.compute(p, r);
|
||
|
low_color = r.m_low_color;
|
||
|
high_color = r.m_high_color;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
dxt_fast::compress_color_block(cluster_pixels.size(), cluster_pixels.begin(), low_color, high_color, selectors.begin(), true);
|
||
|
}
|
||
|
|
||
|
const uint8* pSrc_selectors = selectors.begin();
|
||
|
|
||
|
for (uint block_iter = 0; block_iter < indices.size(); block_iter++)
|
||
|
{
|
||
|
const uint block_index = indices[block_iter];
|
||
|
|
||
|
dxt1_block& dxt_block = get_block(block_index);
|
||
|
|
||
|
dxt_block.set_low_color(static_cast<uint16>(low_color));
|
||
|
dxt_block.set_high_color(static_cast<uint16>(high_color));
|
||
|
|
||
|
uint mask = 0;
|
||
|
for (int i = 15; i >= 0; i--)
|
||
|
{
|
||
|
mask <<= 2;
|
||
|
mask |= pSrc_selectors[i];
|
||
|
}
|
||
|
pSrc_selectors += (cDXTBlockSize * cDXTBlockSize);
|
||
|
|
||
|
dxt_block.m_selectors[0] = static_cast<uint8>(mask & 0xFF);
|
||
|
dxt_block.m_selectors[1] = static_cast<uint8>((mask >> 8) & 0xFF);
|
||
|
dxt_block.m_selectors[2] = static_cast<uint8>((mask >> 16) & 0xFF);
|
||
|
dxt_block.m_selectors[3] = static_cast<uint8>((mask >> 24) & 0xFF);
|
||
|
|
||
|
}
|
||
|
|
||
|
{
|
||
|
scoped_spinlock lock(m_cluster_hash_lock);
|
||
|
|
||
|
m_cluster_hash.insert(cid, low_color | (high_color << 16));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
}
|
||
|
}
|
||
|
|
||
|
struct optimize_selectors_params
|
||
|
{
|
||
|
CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(optimize_selectors_params);
|
||
|
|
||
|
optimize_selectors_params(
|
||
|
crnlib::vector< crnlib::vector<uint> >& selector_cluster_indices) :
|
||
|
m_selector_cluster_indices(selector_cluster_indices)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
crnlib::vector< crnlib::vector<uint> >& m_selector_cluster_indices;
|
||
|
};
|
||
|
|
||
|
void qdxt1::optimize_selectors_task(uint64 data, void* pData_ptr)
|
||
|
{
|
||
|
const uint thread_index = static_cast<uint>(data);
|
||
|
|
||
|
optimize_selectors_params& task_params = *static_cast<optimize_selectors_params*>(pData_ptr);
|
||
|
|
||
|
crnlib::vector<uint> block_categories[2];
|
||
|
block_categories[0].reserve(2048);
|
||
|
block_categories[1].reserve(2048);
|
||
|
|
||
|
for (uint cluster_index = 0; cluster_index < task_params.m_selector_cluster_indices.size(); cluster_index++)
|
||
|
{
|
||
|
if (m_canceled)
|
||
|
return;
|
||
|
|
||
|
if ((cluster_index & 255) == 0)
|
||
|
{
|
||
|
if (crn_get_current_thread_id() == m_main_thread_id)
|
||
|
{
|
||
|
if (!update_progress(cluster_index, task_params.m_selector_cluster_indices.size() - 1))
|
||
|
return;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (m_pTask_pool->get_num_threads())
|
||
|
{
|
||
|
if ((cluster_index % (m_pTask_pool->get_num_threads() + 1)) != thread_index)
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
const crnlib::vector<uint>& selector_indices = task_params.m_selector_cluster_indices[cluster_index];
|
||
|
|
||
|
if (selector_indices.size() <= 1)
|
||
|
continue;
|
||
|
|
||
|
block_categories[0].resize(0);
|
||
|
block_categories[1].resize(0);
|
||
|
|
||
|
for (uint block_iter = 0; block_iter < selector_indices.size(); block_iter++)
|
||
|
{
|
||
|
const uint block_index = selector_indices[block_iter];
|
||
|
|
||
|
const dxt1_block& src_block = get_block(block_index);
|
||
|
|
||
|
if (!src_block.is_alpha_block())
|
||
|
block_categories[0].push_back(block_index);
|
||
|
else
|
||
|
{
|
||
|
bool has_alpha_pixels = false;
|
||
|
|
||
|
if (m_params.m_dxt1a_alpha_threshold > 0)
|
||
|
{
|
||
|
const color_quad_u8* pSrc_pixels = (const color_quad_u8*)m_pBlocks[block_index].m_pixels;
|
||
|
|
||
|
for (uint i = 0; i < cDXTBlockSize * cDXTBlockSize; i++)
|
||
|
{
|
||
|
const color_quad_u8& src = pSrc_pixels[i];
|
||
|
if (src.a < m_params.m_dxt1a_alpha_threshold)
|
||
|
{
|
||
|
has_alpha_pixels = true;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (has_alpha_pixels)
|
||
|
continue;
|
||
|
|
||
|
block_categories[1].push_back(block_index);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
dxt1_block blk;
|
||
|
utils::zero_object(blk);
|
||
|
|
||
|
for (uint block_type = 0; block_type <= 1; block_type++)
|
||
|
{
|
||
|
const crnlib::vector<uint>& block_indices = block_categories[block_type];
|
||
|
if (block_indices.size() <= 1)
|
||
|
continue;
|
||
|
|
||
|
for (uint y = 0; y < 4; y++)
|
||
|
{
|
||
|
for (uint x = 0; x < 4; x++)
|
||
|
{
|
||
|
uint best_s = 0;
|
||
|
uint64 best_error = 0xFFFFFFFFFFULL;
|
||
|
|
||
|
uint max_s = 4;
|
||
|
if (block_type == 1)
|
||
|
max_s = 3;
|
||
|
|
||
|
for (uint s = 0; s < max_s; s++)
|
||
|
{
|
||
|
uint64 total_error = 0;
|
||
|
|
||
|
for (uint block_iter = 0; block_iter < block_indices.size(); block_iter++)
|
||
|
{
|
||
|
const uint block_index = block_indices[block_iter];
|
||
|
|
||
|
const color_quad_u8& orig_color = m_pBlocks[block_index].m_pixels[y][x];
|
||
|
|
||
|
const dxt1_block& dst_block = get_block(block_index);
|
||
|
|
||
|
color_quad_u8 colors[4];
|
||
|
dxt1_block::get_block_colors(colors, static_cast<uint16>(dst_block.get_low_color()), static_cast<uint16>(dst_block.get_high_color()));
|
||
|
|
||
|
uint error = color::color_distance(m_params.m_perceptual, orig_color, colors[s], false);
|
||
|
|
||
|
total_error += error;
|
||
|
}
|
||
|
|
||
|
if (total_error < best_error)
|
||
|
{
|
||
|
best_error = total_error;
|
||
|
best_s = s;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
blk.set_selector(x, y, best_s);
|
||
|
|
||
|
} // x
|
||
|
} // y
|
||
|
|
||
|
for (uint block_iter = 0; block_iter < block_indices.size(); block_iter++)
|
||
|
{
|
||
|
const uint block_index = block_indices[block_iter];
|
||
|
|
||
|
dxt1_block& dst_block = get_block(block_index);
|
||
|
|
||
|
memcpy(dst_block.m_selectors, blk.m_selectors, sizeof(dst_block.m_selectors));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
} // cluster_index
|
||
|
}
|
||
|
|
||
|
bool qdxt1::generate_codebook_progress_callback(uint percentage_completed, void* pData)
|
||
|
{
|
||
|
return static_cast<qdxt1*>(pData)->update_progress(percentage_completed, 100U);
|
||
|
}
|
||
|
|
||
|
bool qdxt1::create_selector_clusters(uint max_selector_clusters, crnlib::vector< crnlib::vector<uint> >& selector_cluster_indices)
|
||
|
{
|
||
|
m_progress_start = m_progress_range;
|
||
|
m_progress_range = 33;
|
||
|
|
||
|
weighted_selector_vec_array selector_vecs(m_num_blocks);
|
||
|
|
||
|
for (uint block_iter = 0; block_iter < m_num_blocks; block_iter++)
|
||
|
{
|
||
|
dxt1_block& dxt1_block = get_block(block_iter);
|
||
|
|
||
|
vec16F sv;
|
||
|
float* pDst = &sv[0];
|
||
|
|
||
|
for (uint y = 0; y < 4; y++)
|
||
|
for (uint x = 0; x < 4; x++)
|
||
|
*pDst++ = g_dxt1_to_linear[dxt1_block.get_selector(x, y)];
|
||
|
|
||
|
const color_quad_u8 first_color(dxt1_block::unpack_color((uint16)dxt1_block.get_low_color(), true));
|
||
|
const color_quad_u8 second_color(dxt1_block::unpack_color((uint16)dxt1_block.get_high_color(), true));
|
||
|
const uint dist = color::color_distance(m_params.m_perceptual, first_color, second_color, false);
|
||
|
|
||
|
const uint cColorDistToWeight = 2000;
|
||
|
const uint cMaxWeight = 2048;
|
||
|
uint weight = math::clamp<uint>(dist / cColorDistToWeight, 1, cMaxWeight);
|
||
|
|
||
|
selector_vecs[block_iter].m_vec = sv;
|
||
|
selector_vecs[block_iter].m_weight = weight;
|
||
|
}
|
||
|
|
||
|
return m_selector_clusterizer.create_clusters(
|
||
|
selector_vecs, max_selector_clusters, selector_cluster_indices, generate_codebook_progress_callback, this);
|
||
|
}
|
||
|
|
||
|
bool qdxt1::pack(dxt1_block* pDst_elements, uint elements_per_block, const qdxt1_params& params, float quality_power_mul)
|
||
|
{
|
||
|
CRNLIB_ASSERT(m_num_blocks);
|
||
|
|
||
|
m_main_thread_id = crn_get_current_thread_id();
|
||
|
m_canceled = false;
|
||
|
|
||
|
m_pDst_elements = pDst_elements;
|
||
|
m_elements_per_block = elements_per_block;
|
||
|
m_params = params;
|
||
|
if (!m_params.m_use_alpha_blocks)
|
||
|
m_params.m_dxt1a_alpha_threshold = 0;
|
||
|
|
||
|
m_prev_percentage_complete = -1;
|
||
|
|
||
|
CRNLIB_ASSERT(m_params.m_quality_level <= qdxt1_params::cMaxQuality);
|
||
|
const float quality = m_params.m_quality_level / (float)qdxt1_params::cMaxQuality;
|
||
|
const float endpoint_quality = powf(quality, 1.8f * quality_power_mul);
|
||
|
const float selector_quality = powf(quality, 1.65f * quality_power_mul);
|
||
|
|
||
|
//const uint max_endpoint_clusters = math::clamp<uint>(static_cast<uint>(m_endpoint_clusterizer.get_codebook_size() * endpoint_quality), 128U, m_endpoint_clusterizer.get_codebook_size());
|
||
|
//const uint max_selector_clusters = math::clamp<uint>(static_cast<uint>(m_max_selector_clusters * selector_quality), 150U, m_max_selector_clusters);
|
||
|
const uint max_endpoint_clusters = math::clamp<uint>(static_cast<uint>(m_endpoint_clusterizer.get_codebook_size() * endpoint_quality), 96U, m_endpoint_clusterizer.get_codebook_size());
|
||
|
const uint max_selector_clusters = math::clamp<uint>(static_cast<uint>(m_max_selector_clusters * selector_quality), 128U, m_max_selector_clusters);
|
||
|
|
||
|
if (quality >= 1.0f)
|
||
|
{
|
||
|
m_endpoint_cluster_indices.resize(m_num_blocks);
|
||
|
for (uint i = 0; i < m_num_blocks; i++)
|
||
|
{
|
||
|
m_endpoint_cluster_indices[i].resize(1);
|
||
|
m_endpoint_cluster_indices[i][0] = i;
|
||
|
}
|
||
|
}
|
||
|
else
|
||
|
m_endpoint_clusterizer.retrieve_clusters(max_endpoint_clusters, m_endpoint_cluster_indices);
|
||
|
|
||
|
// trace("endpoint clusters: %u\n", m_endpoint_cluster_indices.size());
|
||
|
|
||
|
uint total_blocks = 0;
|
||
|
uint max_blocks = 0;
|
||
|
for (uint i = 0; i < m_endpoint_cluster_indices.size(); i++)
|
||
|
{
|
||
|
uint num = m_endpoint_cluster_indices[i].size();
|
||
|
total_blocks += num;
|
||
|
max_blocks = math::maximum(max_blocks, num);
|
||
|
}
|
||
|
#if 0
|
||
|
trace("Num clusters: %u, Average blocks per cluster: %u, Max blocks per cluster: %u\n",
|
||
|
m_endpoint_cluster_indices.size(),
|
||
|
total_blocks / m_endpoint_cluster_indices.size(),
|
||
|
max_blocks);
|
||
|
#endif
|
||
|
|
||
|
crnlib::vector< crnlib::vector<uint> >& selector_cluster_indices = m_cached_selector_cluster_indices[params.m_quality_level];
|
||
|
|
||
|
m_progress_start = 0;
|
||
|
if (quality >= 1.0f)
|
||
|
m_progress_range = 100;
|
||
|
else if (selector_cluster_indices.empty())
|
||
|
m_progress_range = (m_params.m_dxt_quality == cCRNDXTQualitySuperFast) ? 10 : 33;
|
||
|
else
|
||
|
m_progress_range = (m_params.m_dxt_quality == cCRNDXTQualitySuperFast) ? 10 : 50;
|
||
|
|
||
|
for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++)
|
||
|
m_pTask_pool->queue_object_task(this, &qdxt1::pack_endpoints_task, i);
|
||
|
m_pTask_pool->join();
|
||
|
|
||
|
if (m_canceled)
|
||
|
return false;
|
||
|
|
||
|
if (quality >= 1.0f)
|
||
|
return true;
|
||
|
|
||
|
if (selector_cluster_indices.empty())
|
||
|
{
|
||
|
create_selector_clusters(max_selector_clusters, selector_cluster_indices);
|
||
|
|
||
|
if (m_canceled)
|
||
|
{
|
||
|
selector_cluster_indices.clear();
|
||
|
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
m_progress_start += m_progress_range;
|
||
|
m_progress_range = 100 - m_progress_start;
|
||
|
|
||
|
optimize_selectors_params optimize_selectors_task_params(selector_cluster_indices);
|
||
|
|
||
|
for (uint i = 0; i <= m_pTask_pool->get_num_threads(); i++)
|
||
|
m_pTask_pool->queue_object_task(this, &qdxt1::optimize_selectors_task, i, &optimize_selectors_task_params);
|
||
|
|
||
|
m_pTask_pool->join();
|
||
|
|
||
|
return !m_canceled;
|
||
|
}
|
||
|
|
||
|
} // namespace crnlib
|
||
|
|
||
|
|