xenia/third_party/crunch/crnlib/crn_threaded_clusterizer.h

363 lines
11 KiB
C++

// File: crn_threaded_clusterizer.h
// See Copyright Notice and license at the end of inc/crnlib.h
#pragma once
#include "crn_clusterizer.h"
#include "crn_threading.h"
namespace crnlib
{
template<typename VectorType>
class threaded_clusterizer
{
CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(threaded_clusterizer);
public:
threaded_clusterizer(task_pool& tp) :
m_pTask_pool(&tp),
m_pProgress_callback(NULL),
m_pProgress_callback_data(NULL),
m_canceled(false)
{
}
void clear()
{
for (uint i = 0; i < cMaxClusterizers; i++)
m_clusterizers[i].clear();
}
struct weighted_vec
{
weighted_vec() { }
weighted_vec(const VectorType& v, uint w) : m_vec(v), m_weight(w) { }
VectorType m_vec;
uint m_weight;
};
typedef crnlib::vector<weighted_vec> weighted_vec_array;
typedef bool (*progress_callback_func)(uint percentage_completed, void* pProgress_data);
bool create_clusters(
const weighted_vec_array& weighted_vecs,
uint max_clusters, crnlib::vector< crnlib::vector<uint> >& cluster_indices,
progress_callback_func pProgress_callback,
void* pProgress_callback_data)
{
m_main_thread_id = crn_get_current_thread_id();
m_canceled = false;
m_pProgress_callback = pProgress_callback;
m_pProgress_callback_data = pProgress_callback_data;
if (max_clusters >= 128)
{
crnlib::vector<uint> primary_indices(weighted_vecs.size());
for (uint i = 0; i < weighted_vecs.size(); i++)
primary_indices[i] = i;
CRNLIB_ASSUME(cMaxClusterizers == 4);
crnlib::vector<uint> indices[6];
compute_split(weighted_vecs, primary_indices, indices[0], indices[1]);
compute_split(weighted_vecs, indices[0], indices[2], indices[3]);
compute_split(weighted_vecs, indices[1], indices[4], indices[5]);
create_clusters_task_state task_state[4];
m_cluster_task_displayed_progress = false;
uint total_partitions = 0;
for (uint i = 0; i < 4; i++)
{
const uint num_indices = indices[2 + i].size();
if (num_indices)
total_partitions++;
}
for (uint i = 0; i < 4; i++)
{
const uint num_indices = indices[2 + i].size();
if (!num_indices)
continue;
task_state[i].m_pWeighted_vecs = &weighted_vecs;
task_state[i].m_pIndices = &indices[2 + i];
task_state[i].m_max_clusters = (max_clusters + (total_partitions / 2)) / total_partitions;
m_pTask_pool->queue_object_task(this, &threaded_clusterizer::create_clusters_task, i, &task_state[i]);
}
m_pTask_pool->join();
if (m_canceled)
return false;
uint total_clusters = 0;
for (uint i = 0; i < 4; i++)
total_clusters += task_state[i].m_cluster_indices.size();
cluster_indices.reserve(total_clusters);
cluster_indices.resize(0);
for (uint i = 0; i < 4; i++)
{
const uint ofs = cluster_indices.size();
cluster_indices.resize(ofs + task_state[i].m_cluster_indices.size());
for (uint j = 0; j < task_state[i].m_cluster_indices.size(); j++)
{
cluster_indices[ofs + j].swap( task_state[i].m_cluster_indices[j] );
}
}
}
else
{
m_clusterizers[0].clear();
m_clusterizers[0].get_training_vecs().reserve(weighted_vecs.size());
for (uint i = 0; i < weighted_vecs.size(); i++)
{
const weighted_vec& v = weighted_vecs[i];
m_clusterizers[0].add_training_vec(v.m_vec, v.m_weight);
}
m_clusterizers[0].generate_codebook(max_clusters, generate_codebook_progress_callback, this, false);//m_params.m_dxt_quality <= cCRNDXTQualityFast);
const uint num_clusters = m_clusterizers[0].get_codebook_size();
m_clusterizers[0].retrieve_clusters(num_clusters, cluster_indices);
}
return !m_canceled;
}
private:
task_pool* m_pTask_pool;
crn_thread_id_t m_main_thread_id;
struct create_clusters_task_state
{
create_clusters_task_state() : m_pWeighted_vecs(NULL), m_pIndices(NULL), m_max_clusters(0)
{
}
const weighted_vec_array* m_pWeighted_vecs;
crnlib::vector<uint>* m_pIndices;
crnlib::vector< crnlib::vector<uint> > m_cluster_indices;
uint m_max_clusters;
};
typedef clusterizer<VectorType> vector_clusterizer;
enum { cMaxClusterizers = 4 };
vector_clusterizer m_clusterizers[cMaxClusterizers];
bool m_cluster_task_displayed_progress;
progress_callback_func m_pProgress_callback;
void* m_pProgress_callback_data;
bool m_canceled;
static bool generate_codebook_progress_callback(uint percentage_completed, void* pData)
{
threaded_clusterizer* pClusterizer = static_cast<threaded_clusterizer*>(pData);
if (!pClusterizer->m_pProgress_callback)
return true;
if (!pClusterizer->m_pProgress_callback(percentage_completed, pClusterizer->m_pProgress_callback_data))
{
pClusterizer->m_canceled = true;
return false;
}
return true;
}
void compute_pca(VectorType& axis_res, VectorType& centroid_res, const weighted_vec_array& vecs, const vector<uint>& indices)
{
const uint N = VectorType::num_elements;
VectorType centroid(0.0f);
double total_weight = 0.0f;
for (uint i = 0; i < indices.size(); i++)
{
const weighted_vec& v = vecs[indices[i]];
centroid += v.m_vec * static_cast<float>(v.m_weight);
total_weight += v.m_weight;
}
if (total_weight == 0.0f)
{
axis_res.clear();
centroid_res = centroid;
return;
}
double one_over_total_weight = 1.0f / total_weight;
for (uint i = 0; i < N; i++)
centroid[i] = static_cast<float>(centroid[i] * one_over_total_weight);
matrix<N, N, float> covar;
covar.clear();
for (uint i = 0; i < indices.size(); i++)
{
const weighted_vec& weighted_vec = vecs[indices[i]];
const VectorType v(weighted_vec.m_vec - centroid);
const VectorType w(v * static_cast<float>(weighted_vec.m_weight));
for (uint x = 0; x < N; x++)
for (uint y = x; y < N; y++)
covar[x][y] = covar[x][y] + v[x] * w[y];
}
for (uint x = 0; x < N; x++)
for (uint y = x; y < N; y++)
covar[x][y] = static_cast<float>(covar[x][y] * one_over_total_weight);
for (uint x = 0; x < (N - 1); x++)
for (uint y = x + 1; y < N; y++)
covar[y][x] = covar[x][y];
VectorType axis;
for (uint i = 0; i < N; i++)
axis[i] = math::lerp(.75f, 1.25f, i * (1.0f / (N - 1)));
VectorType prev_axis(axis);
const uint cMaxIterations = 10;
for (uint iter = 0; iter < cMaxIterations; iter++)
{
VectorType x;
double max_sum = 0;
for (uint i = 0; i < N; i++)
{
double sum = 0;
for (uint j = 0; j < N; j++)
sum += axis[j] * covar[i][j];
x[i] = static_cast<float>(sum);
max_sum = math::maximum(max_sum, fabs(sum));
}
if (max_sum != 0.0f)
x *= static_cast<float>(1.0f / max_sum);
VectorType delta_axis(prev_axis - x);
prev_axis = axis;
axis = x;
if (delta_axis.norm() < .0025f)
break;
}
axis.normalize();
axis_res = axis;
centroid_res = centroid;
}
void compute_division(
const VectorType& axis, const VectorType& centroid, const weighted_vec_array& vecs, const vector<uint>& indices,
vector<uint>& left_indices,
vector<uint>& right_indices)
{
left_indices.resize(0);
right_indices.resize(0);
for (uint i = 0; i < indices.size(); i++)
{
const uint vec_index = indices[i];
const VectorType v(vecs[vec_index].m_vec - centroid);
float t = v * axis;
if (t < 0.0f)
left_indices.push_back(vec_index);
else
right_indices.push_back(vec_index);
}
}
void compute_split(
const weighted_vec_array& vecs, const vector<uint>& indices,
vector<uint>& left_indices,
vector<uint>& right_indices)
{
VectorType axis, centroid;
compute_pca(axis, centroid, vecs, indices);
compute_division(axis, centroid, vecs, indices, left_indices, right_indices);
}
static bool generate_codebook_dummy_progress_callback(uint percentage_completed, void* pData)
{
percentage_completed;
if (static_cast<threaded_clusterizer*>(pData)->m_canceled)
return false;
return true;
}
void create_clusters_task(uint64 data, void* pData_ptr)
{
if (m_canceled)
return;
const uint partition_index = static_cast<uint>(data);
create_clusters_task_state& state = *static_cast<create_clusters_task_state*>(pData_ptr);
m_clusterizers[partition_index].clear();
for (uint i = 0; i < state.m_pIndices->size(); i++)
{
const uint index = (*state.m_pIndices)[i];
const weighted_vec& v = (*state.m_pWeighted_vecs)[index];
m_clusterizers[partition_index].add_training_vec(v.m_vec, v.m_weight);
}
if (m_canceled)
return;
const bool is_main_thread = (crn_get_current_thread_id() == m_main_thread_id);
const bool quick = false;
m_clusterizers[partition_index].generate_codebook(
state.m_max_clusters,
(is_main_thread && !m_cluster_task_displayed_progress) ? generate_codebook_progress_callback : generate_codebook_dummy_progress_callback,
this,
quick);
if (is_main_thread)
m_cluster_task_displayed_progress = true;
if (m_canceled)
return;
const uint num_clusters = m_clusterizers[partition_index].get_codebook_size();
m_clusterizers[partition_index].retrieve_clusters(num_clusters, state.m_cluster_indices);
for (uint i = 0; i < state.m_cluster_indices.size(); i++)
{
crnlib::vector<uint>& indices = state.m_cluster_indices[i];
for (uint j = 0; j < indices.size(); j++)
indices[j] = (*state.m_pIndices)[indices[j]];
}
}
};
} // namespace crnlib