// File: crn_threaded_clusterizer.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "crn_clusterizer.h" #include "crn_threading.h" namespace crnlib { template<typename VectorType> class threaded_clusterizer { CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(threaded_clusterizer); public: threaded_clusterizer(task_pool& tp) : m_pTask_pool(&tp), m_pProgress_callback(NULL), m_pProgress_callback_data(NULL), m_canceled(false) { } void clear() { for (uint i = 0; i < cMaxClusterizers; i++) m_clusterizers[i].clear(); } struct weighted_vec { weighted_vec() { } weighted_vec(const VectorType& v, uint w) : m_vec(v), m_weight(w) { } VectorType m_vec; uint m_weight; }; typedef crnlib::vector<weighted_vec> weighted_vec_array; typedef bool (*progress_callback_func)(uint percentage_completed, void* pProgress_data); bool create_clusters( const weighted_vec_array& weighted_vecs, uint max_clusters, crnlib::vector< crnlib::vector<uint> >& cluster_indices, progress_callback_func pProgress_callback, void* pProgress_callback_data) { m_main_thread_id = crn_get_current_thread_id(); m_canceled = false; m_pProgress_callback = pProgress_callback; m_pProgress_callback_data = pProgress_callback_data; if (max_clusters >= 128) { crnlib::vector<uint> primary_indices(weighted_vecs.size()); for (uint i = 0; i < weighted_vecs.size(); i++) primary_indices[i] = i; CRNLIB_ASSUME(cMaxClusterizers == 4); crnlib::vector<uint> indices[6]; compute_split(weighted_vecs, primary_indices, indices[0], indices[1]); compute_split(weighted_vecs, indices[0], indices[2], indices[3]); compute_split(weighted_vecs, indices[1], indices[4], indices[5]); create_clusters_task_state task_state[4]; m_cluster_task_displayed_progress = false; uint total_partitions = 0; for (uint i = 0; i < 4; i++) { const uint num_indices = indices[2 + i].size(); if (num_indices) total_partitions++; } for (uint i = 0; i < 4; i++) { const uint num_indices = indices[2 + i].size(); if (!num_indices) continue; task_state[i].m_pWeighted_vecs = &weighted_vecs; task_state[i].m_pIndices = &indices[2 + i]; task_state[i].m_max_clusters = (max_clusters + (total_partitions / 2)) / total_partitions; m_pTask_pool->queue_object_task(this, &threaded_clusterizer::create_clusters_task, i, &task_state[i]); } m_pTask_pool->join(); if (m_canceled) return false; uint total_clusters = 0; for (uint i = 0; i < 4; i++) total_clusters += task_state[i].m_cluster_indices.size(); cluster_indices.reserve(total_clusters); cluster_indices.resize(0); for (uint i = 0; i < 4; i++) { const uint ofs = cluster_indices.size(); cluster_indices.resize(ofs + task_state[i].m_cluster_indices.size()); for (uint j = 0; j < task_state[i].m_cluster_indices.size(); j++) { cluster_indices[ofs + j].swap( task_state[i].m_cluster_indices[j] ); } } } else { m_clusterizers[0].clear(); m_clusterizers[0].get_training_vecs().reserve(weighted_vecs.size()); for (uint i = 0; i < weighted_vecs.size(); i++) { const weighted_vec& v = weighted_vecs[i]; m_clusterizers[0].add_training_vec(v.m_vec, v.m_weight); } m_clusterizers[0].generate_codebook(max_clusters, generate_codebook_progress_callback, this, false);//m_params.m_dxt_quality <= cCRNDXTQualityFast); const uint num_clusters = m_clusterizers[0].get_codebook_size(); m_clusterizers[0].retrieve_clusters(num_clusters, cluster_indices); } return !m_canceled; } private: task_pool* m_pTask_pool; crn_thread_id_t m_main_thread_id; struct create_clusters_task_state { create_clusters_task_state() : m_pWeighted_vecs(NULL), m_pIndices(NULL), m_max_clusters(0) { } const weighted_vec_array* m_pWeighted_vecs; crnlib::vector<uint>* m_pIndices; crnlib::vector< crnlib::vector<uint> > m_cluster_indices; uint m_max_clusters; }; typedef clusterizer<VectorType> vector_clusterizer; enum { cMaxClusterizers = 4 }; vector_clusterizer m_clusterizers[cMaxClusterizers]; bool m_cluster_task_displayed_progress; progress_callback_func m_pProgress_callback; void* m_pProgress_callback_data; bool m_canceled; static bool generate_codebook_progress_callback(uint percentage_completed, void* pData) { threaded_clusterizer* pClusterizer = static_cast<threaded_clusterizer*>(pData); if (!pClusterizer->m_pProgress_callback) return true; if (!pClusterizer->m_pProgress_callback(percentage_completed, pClusterizer->m_pProgress_callback_data)) { pClusterizer->m_canceled = true; return false; } return true; } void compute_pca(VectorType& axis_res, VectorType& centroid_res, const weighted_vec_array& vecs, const vector<uint>& indices) { const uint N = VectorType::num_elements; VectorType centroid(0.0f); double total_weight = 0.0f; for (uint i = 0; i < indices.size(); i++) { const weighted_vec& v = vecs[indices[i]]; centroid += v.m_vec * static_cast<float>(v.m_weight); total_weight += v.m_weight; } if (total_weight == 0.0f) { axis_res.clear(); centroid_res = centroid; return; } double one_over_total_weight = 1.0f / total_weight; for (uint i = 0; i < N; i++) centroid[i] = static_cast<float>(centroid[i] * one_over_total_weight); matrix<N, N, float> covar; covar.clear(); for (uint i = 0; i < indices.size(); i++) { const weighted_vec& weighted_vec = vecs[indices[i]]; const VectorType v(weighted_vec.m_vec - centroid); const VectorType w(v * static_cast<float>(weighted_vec.m_weight)); for (uint x = 0; x < N; x++) for (uint y = x; y < N; y++) covar[x][y] = covar[x][y] + v[x] * w[y]; } for (uint x = 0; x < N; x++) for (uint y = x; y < N; y++) covar[x][y] = static_cast<float>(covar[x][y] * one_over_total_weight); for (uint x = 0; x < (N - 1); x++) for (uint y = x + 1; y < N; y++) covar[y][x] = covar[x][y]; VectorType axis; for (uint i = 0; i < N; i++) axis[i] = math::lerp(.75f, 1.25f, i * (1.0f / (N - 1))); VectorType prev_axis(axis); const uint cMaxIterations = 10; for (uint iter = 0; iter < cMaxIterations; iter++) { VectorType x; double max_sum = 0; for (uint i = 0; i < N; i++) { double sum = 0; for (uint j = 0; j < N; j++) sum += axis[j] * covar[i][j]; x[i] = static_cast<float>(sum); max_sum = math::maximum(max_sum, fabs(sum)); } if (max_sum != 0.0f) x *= static_cast<float>(1.0f / max_sum); VectorType delta_axis(prev_axis - x); prev_axis = axis; axis = x; if (delta_axis.norm() < .0025f) break; } axis.normalize(); axis_res = axis; centroid_res = centroid; } void compute_division( const VectorType& axis, const VectorType& centroid, const weighted_vec_array& vecs, const vector<uint>& indices, vector<uint>& left_indices, vector<uint>& right_indices) { left_indices.resize(0); right_indices.resize(0); for (uint i = 0; i < indices.size(); i++) { const uint vec_index = indices[i]; const VectorType v(vecs[vec_index].m_vec - centroid); float t = v * axis; if (t < 0.0f) left_indices.push_back(vec_index); else right_indices.push_back(vec_index); } } void compute_split( const weighted_vec_array& vecs, const vector<uint>& indices, vector<uint>& left_indices, vector<uint>& right_indices) { VectorType axis, centroid; compute_pca(axis, centroid, vecs, indices); compute_division(axis, centroid, vecs, indices, left_indices, right_indices); } static bool generate_codebook_dummy_progress_callback(uint percentage_completed, void* pData) { percentage_completed; if (static_cast<threaded_clusterizer*>(pData)->m_canceled) return false; return true; } void create_clusters_task(uint64 data, void* pData_ptr) { if (m_canceled) return; const uint partition_index = static_cast<uint>(data); create_clusters_task_state& state = *static_cast<create_clusters_task_state*>(pData_ptr); m_clusterizers[partition_index].clear(); for (uint i = 0; i < state.m_pIndices->size(); i++) { const uint index = (*state.m_pIndices)[i]; const weighted_vec& v = (*state.m_pWeighted_vecs)[index]; m_clusterizers[partition_index].add_training_vec(v.m_vec, v.m_weight); } if (m_canceled) return; const bool is_main_thread = (crn_get_current_thread_id() == m_main_thread_id); const bool quick = false; m_clusterizers[partition_index].generate_codebook( state.m_max_clusters, (is_main_thread && !m_cluster_task_displayed_progress) ? generate_codebook_progress_callback : generate_codebook_dummy_progress_callback, this, quick); if (is_main_thread) m_cluster_task_displayed_progress = true; if (m_canceled) return; const uint num_clusters = m_clusterizers[partition_index].get_codebook_size(); m_clusterizers[partition_index].retrieve_clusters(num_clusters, state.m_cluster_indices); for (uint i = 0; i < state.m_cluster_indices.size(); i++) { crnlib::vector<uint>& indices = state.m_cluster_indices[i]; for (uint j = 0; j < indices.size(); j++) indices[j] = (*state.m_pIndices)[indices[j]]; } } }; } // namespace crnlib