xenia/third_party/crunch/crnlib/crn_threaded_clusterizer.h

// File: crn_threaded_clusterizer.h
// See Copyright Notice and license at the end of inc/crnlib.h
#pragma once
#include "crn_clusterizer.h"
#include "crn_threading.h"

namespace crnlib
{
   template<typename VectorType>
   class threaded_clusterizer
   {
      CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(threaded_clusterizer);

   public:
      threaded_clusterizer(task_pool& tp) :
         m_pTask_pool(&tp),
         m_pProgress_callback(NULL),
         m_pProgress_callback_data(NULL),
         m_canceled(false)
      {
      }

      void clear()
      {
         for (uint i = 0; i < cMaxClusterizers; i++)
            m_clusterizers[i].clear();
      }

      struct weighted_vec
      {
         weighted_vec() { }
         weighted_vec(const VectorType& v, uint w) : m_vec(v), m_weight(w) { }

         VectorType m_vec;
         uint m_weight;
      };
      typedef crnlib::vector<weighted_vec> weighted_vec_array;

      typedef bool (*progress_callback_func)(uint percentage_completed, void* pProgress_data);

      bool create_clusters(
         const weighted_vec_array& weighted_vecs,
         uint max_clusters, crnlib::vector< crnlib::vector<uint> >& cluster_indices,
         progress_callback_func pProgress_callback,
         void* pProgress_callback_data)
      {
         m_main_thread_id = crn_get_current_thread_id();
         m_canceled = false;
         m_pProgress_callback = pProgress_callback;
         m_pProgress_callback_data = pProgress_callback_data;

         if (max_clusters >= 128)
         {
            crnlib::vector<uint> primary_indices(weighted_vecs.size());
            for (uint i = 0; i < weighted_vecs.size(); i++)
               primary_indices[i] = i;

            CRNLIB_ASSUME(cMaxClusterizers == 4);

            crnlib::vector<uint> indices[6];

            compute_split(weighted_vecs, primary_indices, indices[0], indices[1]);
            compute_split(weighted_vecs, indices[0], indices[2], indices[3]);
            compute_split(weighted_vecs, indices[1], indices[4], indices[5]);

            create_clusters_task_state task_state[4];

            m_cluster_task_displayed_progress = false;

            uint total_partitions = 0;
            for (uint i = 0; i < 4; i++)
            {
               const uint num_indices = indices[2 + i].size();
               if (num_indices)
                  total_partitions++;
            }

            for (uint i = 0; i < 4; i++)
            {
               const uint num_indices = indices[2 + i].size();
               if (!num_indices)
                  continue;

               task_state[i].m_pWeighted_vecs = &weighted_vecs;
               task_state[i].m_pIndices = &indices[2 + i];
               task_state[i].m_max_clusters = (max_clusters + (total_partitions / 2)) / total_partitions;

               m_pTask_pool->queue_object_task(this, &threaded_clusterizer::create_clusters_task, i, &task_state[i]);
            }

            m_pTask_pool->join();

            if (m_canceled)
               return false;

            uint total_clusters = 0;
            for (uint i = 0; i < 4; i++)
               total_clusters += task_state[i].m_cluster_indices.size();

            cluster_indices.reserve(total_clusters);
            cluster_indices.resize(0);

            for (uint i = 0; i < 4; i++)
            {
               const uint ofs = cluster_indices.size();

               cluster_indices.resize(ofs + task_state[i].m_cluster_indices.size());

               for (uint j = 0; j < task_state[i].m_cluster_indices.size(); j++)
               {
                  cluster_indices[ofs + j].swap( task_state[i].m_cluster_indices[j] );
               }
            }
         }
         else
         {
            m_clusterizers[0].clear();
            m_clusterizers[0].get_training_vecs().reserve(weighted_vecs.size());

            for (uint i = 0; i < weighted_vecs.size(); i++)
            {
               const weighted_vec& v = weighted_vecs[i];

               m_clusterizers[0].add_training_vec(v.m_vec, v.m_weight);
            }

            m_clusterizers[0].generate_codebook(max_clusters, generate_codebook_progress_callback, this, false);//m_params.m_dxt_quality <= cCRNDXTQualityFast);

            const uint num_clusters = m_clusterizers[0].get_codebook_size();

            m_clusterizers[0].retrieve_clusters(num_clusters, cluster_indices);
         }

         return !m_canceled;
      }

   private:
      task_pool* m_pTask_pool;

      crn_thread_id_t m_main_thread_id;

      struct create_clusters_task_state
      {
         create_clusters_task_state() : m_pWeighted_vecs(NULL), m_pIndices(NULL), m_max_clusters(0)
         {
         }

         const weighted_vec_array*                 m_pWeighted_vecs;
         crnlib::vector<uint>*                     m_pIndices;
         crnlib::vector< crnlib::vector<uint> >    m_cluster_indices;
         uint                                      m_max_clusters;
      };

      typedef clusterizer<VectorType> vector_clusterizer;

      enum { cMaxClusterizers = 4 };
      vector_clusterizer m_clusterizers[cMaxClusterizers];
      bool m_cluster_task_displayed_progress;

      progress_callback_func m_pProgress_callback;
      void* m_pProgress_callback_data;
      bool m_canceled;

      static bool generate_codebook_progress_callback(uint percentage_completed, void* pData)
      {
         threaded_clusterizer* pClusterizer = static_cast<threaded_clusterizer*>(pData);

         if (!pClusterizer->m_pProgress_callback)
            return true;

         if (!pClusterizer->m_pProgress_callback(percentage_completed, pClusterizer->m_pProgress_callback_data))
         {
            pClusterizer->m_canceled = true;
            return false;
         }
         return true;
      }

      void compute_pca(VectorType& axis_res, VectorType& centroid_res, const weighted_vec_array& vecs, const vector<uint>& indices)
      {
         const uint N = VectorType::num_elements;

         VectorType centroid(0.0f);
         double total_weight = 0.0f;
         for (uint i = 0; i < indices.size(); i++)
         {
            const weighted_vec& v = vecs[indices[i]];
            centroid += v.m_vec * static_cast<float>(v.m_weight);
            total_weight += v.m_weight;
         }

         if (total_weight == 0.0f)
         {
            axis_res.clear();
            centroid_res = centroid;
            return;
         }

         double one_over_total_weight = 1.0f / total_weight;
         for (uint i = 0; i < N; i++)
            centroid[i] = static_cast<float>(centroid[i] * one_over_total_weight);

         matrix<N, N, float> covar;
         covar.clear();

         for (uint i = 0; i < indices.size(); i++)
         {
            const weighted_vec& weighted_vec = vecs[indices[i]];

            const VectorType v(weighted_vec.m_vec - centroid);
            const VectorType w(v * static_cast<float>(weighted_vec.m_weight));

            for (uint x = 0; x < N; x++)
               for (uint y = x; y < N; y++)
                  covar[x][y] = covar[x][y] + v[x] * w[y];
         }

         for (uint x = 0; x < N; x++)
            for (uint y = x; y < N; y++)
               covar[x][y] = static_cast<float>(covar[x][y] * one_over_total_weight);

         for (uint x = 0; x < (N - 1); x++)
            for (uint y = x + 1; y < N; y++)
               covar[y][x] = covar[x][y];

         VectorType axis;
         for (uint i = 0; i < N; i++)
            axis[i] = math::lerp(.75f, 1.25f, i * (1.0f / (N - 1)));

         VectorType prev_axis(axis);

         const uint cMaxIterations = 10;
         for (uint iter = 0; iter < cMaxIterations; iter++)
         {
            VectorType x;

            double max_sum = 0;

            for (uint i = 0; i < N; i++)
            {
               double sum = 0;

               for (uint j = 0; j < N; j++)
                  sum += axis[j] * covar[i][j];

               x[i] = static_cast<float>(sum);

               max_sum = math::maximum(max_sum, fabs(sum));
            }

            if (max_sum != 0.0f)
               x *= static_cast<float>(1.0f / max_sum);

            VectorType delta_axis(prev_axis - x);

            prev_axis = axis;
            axis = x;

            if (delta_axis.norm() < .0025f)
               break;
         }

         axis.normalize();

         axis_res = axis;
         centroid_res = centroid;
      }

      void compute_division(
         const VectorType& axis, const VectorType& centroid, const weighted_vec_array& vecs, const vector<uint>& indices,
         vector<uint>& left_indices,
         vector<uint>& right_indices)
      {
         left_indices.resize(0);
         right_indices.resize(0);

         for (uint i = 0; i < indices.size(); i++)
         {
            const uint vec_index = indices[i];
            const VectorType v(vecs[vec_index].m_vec - centroid);

            float t = v * axis;
            if (t < 0.0f)
               left_indices.push_back(vec_index);
            else
               right_indices.push_back(vec_index);
         }
      }

      void compute_split(
         const weighted_vec_array& vecs, const vector<uint>& indices,
         vector<uint>& left_indices,
         vector<uint>& right_indices)
      {
         VectorType axis, centroid;
         compute_pca(axis, centroid, vecs, indices);

         compute_division(axis, centroid, vecs, indices, left_indices, right_indices);
      }

      static bool generate_codebook_dummy_progress_callback(uint percentage_completed, void* pData)
      {
         percentage_completed;

         if (static_cast<threaded_clusterizer*>(pData)->m_canceled)
            return false;

         return true;
      }

      void create_clusters_task(uint64 data, void* pData_ptr)
      {
         if (m_canceled)
            return;

         const uint partition_index = static_cast<uint>(data);
         create_clusters_task_state& state = *static_cast<create_clusters_task_state*>(pData_ptr);

         m_clusterizers[partition_index].clear();

         for (uint i = 0; i < state.m_pIndices->size(); i++)
         {
            const uint index = (*state.m_pIndices)[i];
            const weighted_vec& v = (*state.m_pWeighted_vecs)[index];

            m_clusterizers[partition_index].add_training_vec(v.m_vec, v.m_weight);
         }

         if (m_canceled)
            return;

         const bool is_main_thread = (crn_get_current_thread_id() == m_main_thread_id);

         const bool quick = false;
         m_clusterizers[partition_index].generate_codebook(
            state.m_max_clusters,
            (is_main_thread && !m_cluster_task_displayed_progress) ? generate_codebook_progress_callback : generate_codebook_dummy_progress_callback,
            this,
            quick);

         if (is_main_thread)
            m_cluster_task_displayed_progress = true;

         if (m_canceled)
            return;

         const uint num_clusters = m_clusterizers[partition_index].get_codebook_size();

         m_clusterizers[partition_index].retrieve_clusters(num_clusters, state.m_cluster_indices);

         for (uint i = 0; i < state.m_cluster_indices.size(); i++)
         {
            crnlib::vector<uint>& indices = state.m_cluster_indices[i];

            for (uint j = 0; j < indices.size(); j++)
               indices[j] = (*state.m_pIndices)[indices[j]];
         }
      }

   };

} // namespace crnlib
Adding Crunch at r319. 2014-01-21 07:02:34 +00:00			`// File: crn_threaded_clusterizer.h`
			`// See Copyright Notice and license at the end of inc/crnlib.h`
			`#pragma once`
			`#include "crn_clusterizer.h"`
			`#include "crn_threading.h"`

			`namespace crnlib`
			`{`
			`template<typename VectorType>`
			`class threaded_clusterizer`
			`{`
			`CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(threaded_clusterizer);`

			`public:`
			`threaded_clusterizer(task_pool& tp) :`
			`m_pTask_pool(&tp),`
			`m_pProgress_callback(NULL),`
			`m_pProgress_callback_data(NULL),`
			`m_canceled(false)`
			`{`
			`}`

			`void clear()`
			`{`
			`for (uint i = 0; i < cMaxClusterizers; i++)`
			`m_clusterizers[i].clear();`
			`}`

			`struct weighted_vec`
			`{`
			`weighted_vec() { }`
			`weighted_vec(const VectorType& v, uint w) : m_vec(v), m_weight(w) { }`

			`VectorType m_vec;`
			`uint m_weight;`
			`};`
			`typedef crnlib::vector<weighted_vec> weighted_vec_array;`

			`typedef bool (progress_callback_func)(uint percentage_completed, void pProgress_data);`

			`bool create_clusters(`
			`const weighted_vec_array& weighted_vecs,`
			`uint max_clusters, crnlib::vector< crnlib::vector<uint> >& cluster_indices,`
			`progress_callback_func pProgress_callback,`
			`void* pProgress_callback_data)`
			`{`
			`m_main_thread_id = crn_get_current_thread_id();`
			`m_canceled = false;`
			`m_pProgress_callback = pProgress_callback;`
			`m_pProgress_callback_data = pProgress_callback_data;`

			`if (max_clusters >= 128)`
			`{`
			`crnlib::vector<uint> primary_indices(weighted_vecs.size());`
			`for (uint i = 0; i < weighted_vecs.size(); i++)`
			`primary_indices[i] = i;`

			`CRNLIB_ASSUME(cMaxClusterizers == 4);`

			`crnlib::vector<uint> indices[6];`

			`compute_split(weighted_vecs, primary_indices, indices[0], indices[1]);`
			`compute_split(weighted_vecs, indices[0], indices[2], indices[3]);`
			`compute_split(weighted_vecs, indices[1], indices[4], indices[5]);`

			`create_clusters_task_state task_state[4];`

			`m_cluster_task_displayed_progress = false;`

			`uint total_partitions = 0;`
			`for (uint i = 0; i < 4; i++)`
			`{`
			`const uint num_indices = indices[2 + i].size();`
			`if (num_indices)`
			`total_partitions++;`
			`}`

			`for (uint i = 0; i < 4; i++)`
			`{`
			`const uint num_indices = indices[2 + i].size();`
			`if (!num_indices)`
			`continue;`

			`task_state[i].m_pWeighted_vecs = &weighted_vecs;`
			`task_state[i].m_pIndices = &indices[2 + i];`
			`task_state[i].m_max_clusters = (max_clusters + (total_partitions / 2)) / total_partitions;`

			`m_pTask_pool->queue_object_task(this, &threaded_clusterizer::create_clusters_task, i, &task_state[i]);`
			`}`

			`m_pTask_pool->join();`

			`if (m_canceled)`
			`return false;`

			`uint total_clusters = 0;`
			`for (uint i = 0; i < 4; i++)`
			`total_clusters += task_state[i].m_cluster_indices.size();`

			`cluster_indices.reserve(total_clusters);`
			`cluster_indices.resize(0);`

			`for (uint i = 0; i < 4; i++)`
			`{`
			`const uint ofs = cluster_indices.size();`

			`cluster_indices.resize(ofs + task_state[i].m_cluster_indices.size());`

			`for (uint j = 0; j < task_state[i].m_cluster_indices.size(); j++)`
			`{`
			`cluster_indices[ofs + j].swap( task_state[i].m_cluster_indices[j] );`
			`}`
			`}`
			`}`
			`else`
			`{`
			`m_clusterizers[0].clear();`
			`m_clusterizers[0].get_training_vecs().reserve(weighted_vecs.size());`

			`for (uint i = 0; i < weighted_vecs.size(); i++)`
			`{`
			`const weighted_vec& v = weighted_vecs[i];`

			`m_clusterizers[0].add_training_vec(v.m_vec, v.m_weight);`
			`}`

			`m_clusterizers[0].generate_codebook(max_clusters, generate_codebook_progress_callback, this, false);//m_params.m_dxt_quality <= cCRNDXTQualityFast);`

			`const uint num_clusters = m_clusterizers[0].get_codebook_size();`

			`m_clusterizers[0].retrieve_clusters(num_clusters, cluster_indices);`
			`}`

			`return !m_canceled;`
			`}`

			`private:`
			`task_pool* m_pTask_pool;`

			`crn_thread_id_t m_main_thread_id;`

			`struct create_clusters_task_state`
			`{`
			`create_clusters_task_state() : m_pWeighted_vecs(NULL), m_pIndices(NULL), m_max_clusters(0)`
			`{`
			`}`

			`const weighted_vec_array* m_pWeighted_vecs;`
			`crnlib::vector<uint>* m_pIndices;`
			`crnlib::vector< crnlib::vector<uint> > m_cluster_indices;`
			`uint m_max_clusters;`
			`};`

			`typedef clusterizer<VectorType> vector_clusterizer;`

			`enum { cMaxClusterizers = 4 };`
			`vector_clusterizer m_clusterizers[cMaxClusterizers];`
			`bool m_cluster_task_displayed_progress;`

			`progress_callback_func m_pProgress_callback;`
			`void* m_pProgress_callback_data;`
			`bool m_canceled;`

			`static bool generate_codebook_progress_callback(uint percentage_completed, void* pData)`
			`{`
			`threaded_clusterizer* pClusterizer = static_cast<threaded_clusterizer*>(pData);`

			`if (!pClusterizer->m_pProgress_callback)`
			`return true;`

			`if (!pClusterizer->m_pProgress_callback(percentage_completed, pClusterizer->m_pProgress_callback_data))`
			`{`
			`pClusterizer->m_canceled = true;`
			`return false;`
			`}`
			`return true;`
			`}`

			`void compute_pca(VectorType& axis_res, VectorType& centroid_res, const weighted_vec_array& vecs, const vector<uint>& indices)`
			`{`
			`const uint N = VectorType::num_elements;`

			`VectorType centroid(0.0f);`
			`double total_weight = 0.0f;`
			`for (uint i = 0; i < indices.size(); i++)`
			`{`
			`const weighted_vec& v = vecs[indices[i]];`
			`centroid += v.m_vec * static_cast<float>(v.m_weight);`
			`total_weight += v.m_weight;`
			`}`

			`if (total_weight == 0.0f)`
			`{`
			`axis_res.clear();`
			`centroid_res = centroid;`
			`return;`
			`}`

			`double one_over_total_weight = 1.0f / total_weight;`
			`for (uint i = 0; i < N; i++)`
			`centroid[i] = static_cast<float>(centroid[i] * one_over_total_weight);`

			`matrix<N, N, float> covar;`
			`covar.clear();`

			`for (uint i = 0; i < indices.size(); i++)`
			`{`
			`const weighted_vec& weighted_vec = vecs[indices[i]];`

			`const VectorType v(weighted_vec.m_vec - centroid);`
			`const VectorType w(v * static_cast<float>(weighted_vec.m_weight));`

			`for (uint x = 0; x < N; x++)`
			`for (uint y = x; y < N; y++)`
			`covar[x][y] = covar[x][y] + v[x] * w[y];`
			`}`

			`for (uint x = 0; x < N; x++)`
			`for (uint y = x; y < N; y++)`
			`covar[x][y] = static_cast<float>(covar[x][y] * one_over_total_weight);`

			`for (uint x = 0; x < (N - 1); x++)`
			`for (uint y = x + 1; y < N; y++)`
			`covar[y][x] = covar[x][y];`

			`VectorType axis;`
			`for (uint i = 0; i < N; i++)`
			`axis[i] = math::lerp(.75f, 1.25f, i * (1.0f / (N - 1)));`

			`VectorType prev_axis(axis);`

			`const uint cMaxIterations = 10;`
			`for (uint iter = 0; iter < cMaxIterations; iter++)`
			`{`
			`VectorType x;`

			`double max_sum = 0;`

			`for (uint i = 0; i < N; i++)`
			`{`
			`double sum = 0;`

			`for (uint j = 0; j < N; j++)`
			`sum += axis[j] * covar[i][j];`

			`x[i] = static_cast<float>(sum);`

			`max_sum = math::maximum(max_sum, fabs(sum));`
			`}`

			`if (max_sum != 0.0f)`
			`x *= static_cast<float>(1.0f / max_sum);`

			`VectorType delta_axis(prev_axis - x);`

			`prev_axis = axis;`
			`axis = x;`

			`if (delta_axis.norm() < .0025f)`
			`break;`
			`}`

			`axis.normalize();`

			`axis_res = axis;`
			`centroid_res = centroid;`
			`}`

			`void compute_division(`
			`const VectorType& axis, const VectorType& centroid, const weighted_vec_array& vecs, const vector<uint>& indices,`
			`vector<uint>& left_indices,`
			`vector<uint>& right_indices)`
			`{`
			`left_indices.resize(0);`
			`right_indices.resize(0);`

			`for (uint i = 0; i < indices.size(); i++)`
			`{`
			`const uint vec_index = indices[i];`
			`const VectorType v(vecs[vec_index].m_vec - centroid);`

			`float t = v * axis;`
			`if (t < 0.0f)`
			`left_indices.push_back(vec_index);`
			`else`
			`right_indices.push_back(vec_index);`
			`}`
			`}`

			`void compute_split(`
			`const weighted_vec_array& vecs, const vector<uint>& indices,`
			`vector<uint>& left_indices,`
			`vector<uint>& right_indices)`
			`{`
			`VectorType axis, centroid;`
			`compute_pca(axis, centroid, vecs, indices);`

			`compute_division(axis, centroid, vecs, indices, left_indices, right_indices);`
			`}`

			`static bool generate_codebook_dummy_progress_callback(uint percentage_completed, void* pData)`
			`{`
			`percentage_completed;`

			`if (static_cast<threaded_clusterizer*>(pData)->m_canceled)`
			`return false;`

			`return true;`
			`}`

			`void create_clusters_task(uint64 data, void* pData_ptr)`
			`{`
			`if (m_canceled)`
			`return;`

			`const uint partition_index = static_cast<uint>(data);`
			`create_clusters_task_state& state = static_cast<create_clusters_task_state>(pData_ptr);`

			`m_clusterizers[partition_index].clear();`

			`for (uint i = 0; i < state.m_pIndices->size(); i++)`
			`{`
			`const uint index = (*state.m_pIndices)[i];`
			`const weighted_vec& v = (*state.m_pWeighted_vecs)[index];`

			`m_clusterizers[partition_index].add_training_vec(v.m_vec, v.m_weight);`
			`}`

			`if (m_canceled)`
			`return;`

			`const bool is_main_thread = (crn_get_current_thread_id() == m_main_thread_id);`

			`const bool quick = false;`
			`m_clusterizers[partition_index].generate_codebook(`
			`state.m_max_clusters,`
			`(is_main_thread && !m_cluster_task_displayed_progress) ? generate_codebook_progress_callback : generate_codebook_dummy_progress_callback,`
			`this,`
			`quick);`

			`if (is_main_thread)`
			`m_cluster_task_displayed_progress = true;`

			`if (m_canceled)`
			`return;`

			`const uint num_clusters = m_clusterizers[partition_index].get_codebook_size();`

			`m_clusterizers[partition_index].retrieve_clusters(num_clusters, state.m_cluster_indices);`

			`for (uint i = 0; i < state.m_cluster_indices.size(); i++)`
			`{`
			`crnlib::vector<uint>& indices = state.m_cluster_indices[i];`

			`for (uint j = 0; j < indices.size(); j++)`
			`indices[j] = (*state.m_pIndices)[indices[j]];`
			`}`
			`}`

			`};`

			`} // namespace crnlib`