// File: crn_dxt_hc.h // See Copyright Notice and license at the end of inc/crnlib.h #pragma once #include "crn_dxt1.h" #include "crn_dxt5a.h" #include "crn_dxt_endpoint_refiner.h" #include "crn_image.h" #include "crn_dxt.h" #include "crn_image.h" #include "crn_dxt_hc_common.h" #include "crn_tree_clusterizer.h" #include "crn_threading.h" #define CRN_NO_FUNCTION_DEFINITIONS #include "../inc/crnlib.h" namespace crnlib { const uint cTotalCompressionPhases = 25; class dxt_hc { public: dxt_hc(); ~dxt_hc(); struct pixel_chunk { pixel_chunk() { clear(); } dxt_pixel_block m_blocks[cChunkBlockHeight][cChunkBlockWidth]; const color_quad_u8& operator() (uint cx, uint cy) const { CRNLIB_ASSERT((cx < cChunkPixelWidth) && (cy < cChunkPixelHeight)); return m_blocks[cy >> cBlockPixelHeightShift][cx >> cBlockPixelWidthShift].m_pixels [cy & (cBlockPixelHeight - 1)][cx & (cBlockPixelWidth - 1)]; } color_quad_u8& operator() (uint cx, uint cy) { CRNLIB_ASSERT((cx < cChunkPixelWidth) && (cy < cChunkPixelHeight)); return m_blocks[cy >> cBlockPixelHeightShift][cx >> cBlockPixelWidthShift].m_pixels [cy & (cBlockPixelHeight - 1)][cx & (cBlockPixelWidth - 1)]; } inline void clear() { utils::zero_object(*this); m_weight = 1.0f; } float m_weight; }; typedef crnlib::vector pixel_chunk_vec; struct params { params() : m_color_endpoint_codebook_size(3072), m_color_selector_codebook_size(3072), m_alpha_endpoint_codebook_size(3072), m_alpha_selector_codebook_size(3072), m_adaptive_tile_color_psnr_derating(2.0f), // was 3.4f m_adaptive_tile_alpha_psnr_derating(2.0f), m_adaptive_tile_color_alpha_weighting_ratio(3.0f), m_num_levels(0), m_format(cDXT1), m_hierarchical(true), m_perceptual(true), m_debugging(false), m_pProgress_func(NULL), m_pProgress_func_data(NULL) { m_alpha_component_indices[0] = 3; m_alpha_component_indices[1] = 0; for (uint i = 0; i < cCRNMaxLevels; i++) { m_levels[i].m_first_chunk = 0; m_levels[i].m_num_chunks = 0; } } // Valid range for codebook sizes: [32,8192] (non-power of two values are okay) uint m_color_endpoint_codebook_size; uint m_color_selector_codebook_size; uint m_alpha_endpoint_codebook_size; uint m_alpha_selector_codebook_size; // Higher values cause fewer 8x4, 4x8, and 4x4 blocks to be utilized less often (lower quality/smaller files). // Lower values cause the encoder to use large tiles less often (better quality/larger files). // Valid range: [0.0,100.0]. // A value of 0 will cause the encoder to only use tiles larger than 4x4 if doing so would incur to quality loss. float m_adaptive_tile_color_psnr_derating; float m_adaptive_tile_alpha_psnr_derating; float m_adaptive_tile_color_alpha_weighting_ratio; uint m_alpha_component_indices[2]; struct miplevel_desc { uint m_first_chunk; uint m_num_chunks; }; // The mip level data is optional! miplevel_desc m_levels[cCRNMaxLevels]; uint m_num_levels; dxt_format m_format; // If m_hierarchical is false, only 4x4 blocks will be used by the encoder (leading to higher quality/larger files). bool m_hierarchical; // If m_perceptual is true, perceptual color metrics will be used by the encoder. bool m_perceptual; bool m_debugging; crn_progress_callback_func m_pProgress_func; void* m_pProgress_func_data; }; void clear(); // Main compression function bool compress(const params& p, uint num_chunks, const pixel_chunk* pChunks, task_pool& task_pool); // Output accessors inline uint get_num_chunks() const { return m_num_chunks; } struct chunk_encoding { chunk_encoding() { utils::zero_object(*this); }; // Index into g_chunk_encodings. uint8 m_encoding_index; // Number of tiles, endpoint indices. uint8 m_num_tiles; // Color, alpha0, alpha1 enum { cColorIndex = 0, cAlpha0Index = 1, cAlpha1Index = 2 }; uint16 m_endpoint_indices[3][cChunkMaxTiles]; uint16 m_selector_indices[3][cChunkBlockHeight][cChunkBlockWidth]; // [block_y][block_x] }; typedef crnlib::vector chunk_encoding_vec; inline const chunk_encoding& get_chunk_encoding(uint chunk_index) const { return m_chunk_encoding[chunk_index]; } inline const chunk_encoding_vec& get_chunk_encoding_vec() const { return m_chunk_encoding; } struct selectors { selectors() { utils::zero_object(*this); } uint8 m_selectors[cBlockPixelHeight][cBlockPixelWidth]; uint8 get_by_index(uint i) const { CRNLIB_ASSERT(i < (cBlockPixelWidth * cBlockPixelHeight)); const uint8* p = (const uint8*)m_selectors; return *(p + i); } void set_by_index(uint i, uint v) { CRNLIB_ASSERT(i < (cBlockPixelWidth * cBlockPixelHeight)); uint8* p = (uint8*)m_selectors; *(p + i) = static_cast(v); } }; typedef crnlib::vector selectors_vec; // Color endpoints inline uint get_color_endpoint_codebook_size() const { return m_color_endpoints.size(); } inline uint get_color_endpoint(uint codebook_index) const { return m_color_endpoints[codebook_index]; } const crnlib::vector& get_color_endpoint_vec() const { return m_color_endpoints; } // Color selectors uint get_color_selector_codebook_size() const { return m_color_selectors.size(); } const selectors& get_color_selectors(uint codebook_index) const { return m_color_selectors[codebook_index]; } const crnlib::vector& get_color_selectors_vec() const { return m_color_selectors; } // Alpha endpoints inline uint get_alpha_endpoint_codebook_size() const { return m_alpha_endpoints.size(); } inline uint get_alpha_endpoint(uint codebook_index) const { return m_alpha_endpoints[codebook_index]; } const crnlib::vector& get_alpha_endpoint_vec() const { return m_alpha_endpoints; } // Alpha selectors uint get_alpha_selector_codebook_size() const { return m_alpha_selectors.size(); } const selectors& get_alpha_selectors(uint codebook_index) const { return m_alpha_selectors[codebook_index]; } const crnlib::vector& get_alpha_selectors_vec() const { return m_alpha_selectors; } // Debug images const pixel_chunk_vec& get_compressed_chunk_pixels() const { return m_dbg_chunk_pixels; } const pixel_chunk_vec& get_compressed_chunk_pixels_tile_vis() const { return m_dbg_chunk_pixels_tile_vis; } const pixel_chunk_vec& get_compressed_chunk_pixels_color_quantized() const { return m_dbg_chunk_pixels_color_quantized; } const pixel_chunk_vec& get_compressed_chunk_pixels_alpha_quantized() const { return m_dbg_chunk_pixels_alpha_quantized; } const pixel_chunk_vec& get_compressed_chunk_pixels_final() const { return m_dbg_chunk_pixels_final; } const pixel_chunk_vec& get_compressed_chunk_pixels_orig_color_selectors() const { return m_dbg_chunk_pixels_orig_color_selectors; } const pixel_chunk_vec& get_compressed_chunk_pixels_quantized_color_selectors() const { return m_dbg_chunk_pixels_quantized_color_selectors; } const pixel_chunk_vec& get_compressed_chunk_pixels_final_color_selectors() const { return m_dbg_chunk_pixels_final_color_selectors; } const pixel_chunk_vec& get_compressed_chunk_pixels_orig_alpha_selectors() const { return m_dbg_chunk_pixels_orig_alpha_selectors; } const pixel_chunk_vec& get_compressed_chunk_pixels_quantized_alpha_selectors() const { return m_dbg_chunk_pixels_quantized_alpha_selectors; } const pixel_chunk_vec& get_compressed_chunk_pixels_final_alpha_selectors() const { return m_dbg_chunk_pixels_final_alpha_selectors; } static void create_debug_image_from_chunks(uint num_chunks_x, uint num_chunks_y, const pixel_chunk_vec& chunks, const chunk_encoding_vec *pChunk_encodings, image_u8& img, bool serpentine_scan, int comp_index = -1); private: params m_params; uint m_num_chunks; const pixel_chunk* m_pChunks; chunk_encoding_vec m_chunk_encoding; uint m_num_alpha_blocks; // 0, 1, or 2 bool m_has_color_blocks; bool m_has_alpha0_blocks; bool m_has_alpha1_blocks; struct compressed_tile { uint m_endpoint_cluster_index; uint m_first_endpoint; uint m_second_endpoint; uint8 m_selectors[cChunkPixelWidth * cChunkPixelHeight]; void set_selector(uint x, uint y, uint s) { CRNLIB_ASSERT((x < m_pixel_width) && (y < m_pixel_height)); m_selectors[x + y * m_pixel_width] = static_cast(s); } uint get_selector(uint x, uint y) const { CRNLIB_ASSERT((x < m_pixel_width) && (y < m_pixel_height)); return m_selectors[x + y * m_pixel_width]; } uint8 m_pixel_width; uint8 m_pixel_height; uint8 m_layout_index; bool m_alpha_encoding; }; struct compressed_chunk { compressed_chunk() { utils::zero_object(*this); } uint8 m_encoding_index; uint8 m_num_tiles; compressed_tile m_tiles[cChunkMaxTiles]; compressed_tile m_quantized_tiles[cChunkMaxTiles]; uint16 m_endpoint_cluster_index[cChunkMaxTiles]; uint16 m_selector_cluster_index[cChunkBlockHeight][cChunkBlockWidth]; }; typedef crnlib::vector compressed_chunk_vec; enum { cColorChunks = 0, cAlpha0Chunks = 1, cAlpha1Chunks = 2, cNumCompressedChunkVecs = 3 }; compressed_chunk_vec m_compressed_chunks[cNumCompressedChunkVecs]; volatile atomic32_t m_encoding_hist[cNumChunkEncodings]; atomic32_t m_total_tiles; void compress_dxt1_block( dxt1_endpoint_optimizer::results& results, uint chunk_index, const image_u8& chunk, uint x_ofs, uint y_ofs, uint width, uint height, uint8* pSelectors); void compress_dxt5_block( dxt5_endpoint_optimizer::results& results, uint chunk_index, const image_u8& chunk, uint x_ofs, uint y_ofs, uint width, uint height, uint component_index, uint8* pAlpha_selectors); void determine_compressed_chunks_task(uint64 data, void* pData_ptr); bool determine_compressed_chunks(); struct tile_cluster { tile_cluster() : m_first_endpoint(0), m_second_endpoint(0), m_error(0), m_alpha_encoding(false) { } // first = chunk, second = tile // if an alpha tile, second's upper 16 bits contains the alpha index (0 or 1) crnlib::vector< std::pair > m_tiles; uint m_first_endpoint; uint m_second_endpoint; uint64 m_error; bool m_alpha_encoding; }; typedef crnlib::vector tile_cluster_vec; tile_cluster_vec m_color_clusters; tile_cluster_vec m_alpha_clusters; selectors_vec m_color_selectors; selectors_vec m_alpha_selectors; // For each selector, this array indicates every chunk/tile/tile block that use this color selector. struct block_id { block_id() { utils::zero_object(*this); } block_id(uint chunk_index, uint alpha_index, uint tile_index, uint block_x, uint block_y) : m_chunk_index(chunk_index), m_alpha_index((uint8)alpha_index), m_tile_index((uint8)tile_index), m_block_x((uint8)block_x), m_block_y((uint8)block_y) { } uint m_chunk_index; uint8 m_alpha_index; uint8 m_tile_index; uint8 m_block_x; uint8 m_block_y; }; typedef crnlib::vector< crnlib::vector< block_id > > chunk_blocks_using_selectors_vec; chunk_blocks_using_selectors_vec m_chunk_blocks_using_color_selectors; chunk_blocks_using_selectors_vec m_chunk_blocks_using_alpha_selectors; // second's upper 16 bits contain alpha index! crnlib::vector m_color_endpoints; // not valid until end, only for user access crnlib::vector m_alpha_endpoints; // not valid until end, only for user access // Debugging pixel_chunk_vec m_dbg_chunk_pixels; pixel_chunk_vec m_dbg_chunk_pixels_tile_vis; pixel_chunk_vec m_dbg_chunk_pixels_color_quantized; pixel_chunk_vec m_dbg_chunk_pixels_alpha_quantized; pixel_chunk_vec m_dbg_chunk_pixels_orig_color_selectors; pixel_chunk_vec m_dbg_chunk_pixels_quantized_color_selectors; pixel_chunk_vec m_dbg_chunk_pixels_final_color_selectors; pixel_chunk_vec m_dbg_chunk_pixels_orig_alpha_selectors; pixel_chunk_vec m_dbg_chunk_pixels_quantized_alpha_selectors; pixel_chunk_vec m_dbg_chunk_pixels_final_alpha_selectors; pixel_chunk_vec m_dbg_chunk_pixels_final; crn_thread_id_t m_main_thread_id; bool m_canceled; task_pool* m_pTask_pool; int m_prev_phase_index; int m_prev_percentage_complete; typedef vec<6, float> vec6F; typedef vec<16, float> vec16F; typedef tree_clusterizer vec2F_tree_vq; typedef tree_clusterizer vec6F_tree_vq; typedef tree_clusterizer vec16F_tree_vq; struct assign_color_endpoint_clusters_state { CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(assign_color_endpoint_clusters_state); assign_color_endpoint_clusters_state(vec6F_tree_vq& vq, crnlib::vector< crnlib::vector >& training_vecs) : m_vq(vq), m_training_vecs(training_vecs) { } vec6F_tree_vq& m_vq; crnlib::vector< crnlib::vector >& m_training_vecs; }; struct create_selector_codebook_state { CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(create_selector_codebook_state); create_selector_codebook_state(dxt_hc& hc, bool alpha_blocks, uint comp_index_start, uint comp_index_end, vec16F_tree_vq& selector_vq, chunk_blocks_using_selectors_vec& chunk_blocks_using_selectors, selectors_vec& selectors_cb) : m_hc(hc), m_alpha_blocks(alpha_blocks), m_comp_index_start(comp_index_start), m_comp_index_end(comp_index_end), m_selector_vq(selector_vq), m_chunk_blocks_using_selectors(chunk_blocks_using_selectors), m_selectors_cb(selectors_cb) { } dxt_hc& m_hc; bool m_alpha_blocks; uint m_comp_index_start; uint m_comp_index_end; vec16F_tree_vq& m_selector_vq; chunk_blocks_using_selectors_vec& m_chunk_blocks_using_selectors; selectors_vec& m_selectors_cb; mutable spinlock m_chunk_blocks_using_selectors_lock; }; void assign_color_endpoint_clusters_task(uint64 data, void* pData_ptr); bool determine_color_endpoint_clusters(); struct determine_alpha_endpoint_clusters_state { vec2F_tree_vq m_vq; crnlib::vector< crnlib::vector > m_training_vecs[2]; }; void determine_alpha_endpoint_clusters_task(uint64 data, void* pData_ptr); bool determine_alpha_endpoint_clusters(); void determine_color_endpoint_codebook_task(uint64 data, void* pData_ptr); bool determine_color_endpoint_codebook(); void determine_alpha_endpoint_codebook_task(uint64 data, void* pData_ptr); bool determine_alpha_endpoint_codebook(); void create_quantized_debug_images(); void create_selector_codebook_task(uint64 data, void* pData_ptr); bool create_selector_codebook(bool alpha_blocks); bool refine_quantized_color_endpoints(); bool refine_quantized_color_selectors(); bool refine_quantized_alpha_endpoints(); bool refine_quantized_alpha_selectors(); void create_final_debug_image(); bool create_chunk_encodings(); bool update_progress(uint phase_index, uint subphase_index, uint subphase_total); bool compress_internal(const params& p, uint num_chunks, const pixel_chunk* pChunks); }; CRNLIB_DEFINE_BITWISE_COPYABLE(dxt_hc::pixel_chunk); CRNLIB_DEFINE_BITWISE_COPYABLE(dxt_hc::chunk_encoding); CRNLIB_DEFINE_BITWISE_COPYABLE(dxt_hc::selectors); } // namespace crnlib