440 lines
17 KiB
C
440 lines
17 KiB
C
|
// File: crn_dxt_hc.h
|
||
|
// See Copyright Notice and license at the end of inc/crnlib.h
|
||
|
#pragma once
|
||
|
#include "crn_dxt1.h"
|
||
|
#include "crn_dxt5a.h"
|
||
|
#include "crn_dxt_endpoint_refiner.h"
|
||
|
#include "crn_image.h"
|
||
|
#include "crn_dxt.h"
|
||
|
#include "crn_image.h"
|
||
|
#include "crn_dxt_hc_common.h"
|
||
|
#include "crn_tree_clusterizer.h"
|
||
|
#include "crn_threading.h"
|
||
|
|
||
|
#define CRN_NO_FUNCTION_DEFINITIONS
|
||
|
#include "../inc/crnlib.h"
|
||
|
|
||
|
namespace crnlib
|
||
|
{
|
||
|
const uint cTotalCompressionPhases = 25;
|
||
|
|
||
|
class dxt_hc
|
||
|
{
|
||
|
public:
|
||
|
dxt_hc();
|
||
|
~dxt_hc();
|
||
|
|
||
|
struct pixel_chunk
|
||
|
{
|
||
|
pixel_chunk() { clear(); }
|
||
|
|
||
|
dxt_pixel_block m_blocks[cChunkBlockHeight][cChunkBlockWidth];
|
||
|
|
||
|
const color_quad_u8& operator() (uint cx, uint cy) const
|
||
|
{
|
||
|
CRNLIB_ASSERT((cx < cChunkPixelWidth) && (cy < cChunkPixelHeight));
|
||
|
|
||
|
return m_blocks[cy >> cBlockPixelHeightShift][cx >> cBlockPixelWidthShift].m_pixels
|
||
|
[cy & (cBlockPixelHeight - 1)][cx & (cBlockPixelWidth - 1)];
|
||
|
}
|
||
|
|
||
|
color_quad_u8& operator() (uint cx, uint cy)
|
||
|
{
|
||
|
CRNLIB_ASSERT((cx < cChunkPixelWidth) && (cy < cChunkPixelHeight));
|
||
|
|
||
|
return m_blocks[cy >> cBlockPixelHeightShift][cx >> cBlockPixelWidthShift].m_pixels
|
||
|
[cy & (cBlockPixelHeight - 1)][cx & (cBlockPixelWidth - 1)];
|
||
|
}
|
||
|
|
||
|
inline void clear()
|
||
|
{
|
||
|
utils::zero_object(*this);
|
||
|
m_weight = 1.0f;
|
||
|
}
|
||
|
|
||
|
float m_weight;
|
||
|
};
|
||
|
|
||
|
typedef crnlib::vector<pixel_chunk> pixel_chunk_vec;
|
||
|
|
||
|
struct params
|
||
|
{
|
||
|
params() :
|
||
|
m_color_endpoint_codebook_size(3072),
|
||
|
m_color_selector_codebook_size(3072),
|
||
|
m_alpha_endpoint_codebook_size(3072),
|
||
|
m_alpha_selector_codebook_size(3072),
|
||
|
m_adaptive_tile_color_psnr_derating(2.0f), // was 3.4f
|
||
|
m_adaptive_tile_alpha_psnr_derating(2.0f),
|
||
|
m_adaptive_tile_color_alpha_weighting_ratio(3.0f),
|
||
|
m_num_levels(0),
|
||
|
m_format(cDXT1),
|
||
|
m_hierarchical(true),
|
||
|
m_perceptual(true),
|
||
|
m_debugging(false),
|
||
|
m_pProgress_func(NULL),
|
||
|
m_pProgress_func_data(NULL)
|
||
|
{
|
||
|
m_alpha_component_indices[0] = 3;
|
||
|
m_alpha_component_indices[1] = 0;
|
||
|
|
||
|
for (uint i = 0; i < cCRNMaxLevels; i++)
|
||
|
{
|
||
|
m_levels[i].m_first_chunk = 0;
|
||
|
m_levels[i].m_num_chunks = 0;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Valid range for codebook sizes: [32,8192] (non-power of two values are okay)
|
||
|
uint m_color_endpoint_codebook_size;
|
||
|
uint m_color_selector_codebook_size;
|
||
|
|
||
|
uint m_alpha_endpoint_codebook_size;
|
||
|
uint m_alpha_selector_codebook_size;
|
||
|
|
||
|
// Higher values cause fewer 8x4, 4x8, and 4x4 blocks to be utilized less often (lower quality/smaller files).
|
||
|
// Lower values cause the encoder to use large tiles less often (better quality/larger files).
|
||
|
// Valid range: [0.0,100.0].
|
||
|
// A value of 0 will cause the encoder to only use tiles larger than 4x4 if doing so would incur to quality loss.
|
||
|
float m_adaptive_tile_color_psnr_derating;
|
||
|
|
||
|
float m_adaptive_tile_alpha_psnr_derating;
|
||
|
|
||
|
float m_adaptive_tile_color_alpha_weighting_ratio;
|
||
|
|
||
|
uint m_alpha_component_indices[2];
|
||
|
|
||
|
struct miplevel_desc
|
||
|
{
|
||
|
uint m_first_chunk;
|
||
|
uint m_num_chunks;
|
||
|
};
|
||
|
// The mip level data is optional!
|
||
|
miplevel_desc m_levels[cCRNMaxLevels];
|
||
|
uint m_num_levels;
|
||
|
|
||
|
dxt_format m_format;
|
||
|
|
||
|
// If m_hierarchical is false, only 4x4 blocks will be used by the encoder (leading to higher quality/larger files).
|
||
|
bool m_hierarchical;
|
||
|
|
||
|
// If m_perceptual is true, perceptual color metrics will be used by the encoder.
|
||
|
bool m_perceptual;
|
||
|
|
||
|
bool m_debugging;
|
||
|
|
||
|
crn_progress_callback_func m_pProgress_func;
|
||
|
void* m_pProgress_func_data;
|
||
|
};
|
||
|
|
||
|
void clear();
|
||
|
|
||
|
// Main compression function
|
||
|
bool compress(const params& p, uint num_chunks, const pixel_chunk* pChunks, task_pool& task_pool);
|
||
|
|
||
|
// Output accessors
|
||
|
inline uint get_num_chunks() const { return m_num_chunks; }
|
||
|
|
||
|
struct chunk_encoding
|
||
|
{
|
||
|
chunk_encoding() { utils::zero_object(*this); };
|
||
|
|
||
|
// Index into g_chunk_encodings.
|
||
|
uint8 m_encoding_index;
|
||
|
|
||
|
// Number of tiles, endpoint indices.
|
||
|
uint8 m_num_tiles;
|
||
|
|
||
|
// Color, alpha0, alpha1
|
||
|
enum { cColorIndex = 0, cAlpha0Index = 1, cAlpha1Index = 2 };
|
||
|
uint16 m_endpoint_indices[3][cChunkMaxTiles];
|
||
|
uint16 m_selector_indices[3][cChunkBlockHeight][cChunkBlockWidth]; // [block_y][block_x]
|
||
|
};
|
||
|
|
||
|
typedef crnlib::vector<chunk_encoding> chunk_encoding_vec;
|
||
|
|
||
|
inline const chunk_encoding& get_chunk_encoding(uint chunk_index) const { return m_chunk_encoding[chunk_index]; }
|
||
|
inline const chunk_encoding_vec& get_chunk_encoding_vec() const { return m_chunk_encoding; }
|
||
|
|
||
|
struct selectors
|
||
|
{
|
||
|
selectors() { utils::zero_object(*this); }
|
||
|
|
||
|
uint8 m_selectors[cBlockPixelHeight][cBlockPixelWidth];
|
||
|
|
||
|
uint8 get_by_index(uint i) const { CRNLIB_ASSERT(i < (cBlockPixelWidth * cBlockPixelHeight)); const uint8* p = (const uint8*)m_selectors; return *(p + i); }
|
||
|
void set_by_index(uint i, uint v) { CRNLIB_ASSERT(i < (cBlockPixelWidth * cBlockPixelHeight)); uint8* p = (uint8*)m_selectors; *(p + i) = static_cast<uint8>(v); }
|
||
|
};
|
||
|
typedef crnlib::vector<selectors> selectors_vec;
|
||
|
|
||
|
// Color endpoints
|
||
|
inline uint get_color_endpoint_codebook_size() const { return m_color_endpoints.size(); }
|
||
|
inline uint get_color_endpoint(uint codebook_index) const { return m_color_endpoints[codebook_index]; }
|
||
|
const crnlib::vector<uint>& get_color_endpoint_vec() const { return m_color_endpoints; }
|
||
|
|
||
|
// Color selectors
|
||
|
uint get_color_selector_codebook_size() const { return m_color_selectors.size(); }
|
||
|
const selectors& get_color_selectors(uint codebook_index) const { return m_color_selectors[codebook_index]; }
|
||
|
const crnlib::vector<selectors>& get_color_selectors_vec() const { return m_color_selectors; }
|
||
|
|
||
|
// Alpha endpoints
|
||
|
inline uint get_alpha_endpoint_codebook_size() const { return m_alpha_endpoints.size(); }
|
||
|
inline uint get_alpha_endpoint(uint codebook_index) const { return m_alpha_endpoints[codebook_index]; }
|
||
|
const crnlib::vector<uint>& get_alpha_endpoint_vec() const { return m_alpha_endpoints; }
|
||
|
|
||
|
// Alpha selectors
|
||
|
uint get_alpha_selector_codebook_size() const { return m_alpha_selectors.size(); }
|
||
|
const selectors& get_alpha_selectors(uint codebook_index) const { return m_alpha_selectors[codebook_index]; }
|
||
|
const crnlib::vector<selectors>& get_alpha_selectors_vec() const { return m_alpha_selectors; }
|
||
|
|
||
|
// Debug images
|
||
|
const pixel_chunk_vec& get_compressed_chunk_pixels() const { return m_dbg_chunk_pixels; }
|
||
|
const pixel_chunk_vec& get_compressed_chunk_pixels_tile_vis() const { return m_dbg_chunk_pixels_tile_vis; }
|
||
|
const pixel_chunk_vec& get_compressed_chunk_pixels_color_quantized() const { return m_dbg_chunk_pixels_color_quantized; }
|
||
|
const pixel_chunk_vec& get_compressed_chunk_pixels_alpha_quantized() const { return m_dbg_chunk_pixels_alpha_quantized; }
|
||
|
const pixel_chunk_vec& get_compressed_chunk_pixels_final() const { return m_dbg_chunk_pixels_final; }
|
||
|
|
||
|
const pixel_chunk_vec& get_compressed_chunk_pixels_orig_color_selectors() const { return m_dbg_chunk_pixels_orig_color_selectors; }
|
||
|
const pixel_chunk_vec& get_compressed_chunk_pixels_quantized_color_selectors() const { return m_dbg_chunk_pixels_quantized_color_selectors; }
|
||
|
const pixel_chunk_vec& get_compressed_chunk_pixels_final_color_selectors() const { return m_dbg_chunk_pixels_final_color_selectors; }
|
||
|
|
||
|
const pixel_chunk_vec& get_compressed_chunk_pixels_orig_alpha_selectors() const { return m_dbg_chunk_pixels_orig_alpha_selectors; }
|
||
|
const pixel_chunk_vec& get_compressed_chunk_pixels_quantized_alpha_selectors() const { return m_dbg_chunk_pixels_quantized_alpha_selectors; }
|
||
|
const pixel_chunk_vec& get_compressed_chunk_pixels_final_alpha_selectors() const { return m_dbg_chunk_pixels_final_alpha_selectors; }
|
||
|
|
||
|
static void create_debug_image_from_chunks(uint num_chunks_x, uint num_chunks_y, const pixel_chunk_vec& chunks, const chunk_encoding_vec *pChunk_encodings, image_u8& img, bool serpentine_scan, int comp_index = -1);
|
||
|
|
||
|
private:
|
||
|
params m_params;
|
||
|
|
||
|
uint m_num_chunks;
|
||
|
const pixel_chunk* m_pChunks;
|
||
|
|
||
|
chunk_encoding_vec m_chunk_encoding;
|
||
|
|
||
|
uint m_num_alpha_blocks; // 0, 1, or 2
|
||
|
bool m_has_color_blocks;
|
||
|
bool m_has_alpha0_blocks;
|
||
|
bool m_has_alpha1_blocks;
|
||
|
|
||
|
struct compressed_tile
|
||
|
{
|
||
|
uint m_endpoint_cluster_index;
|
||
|
uint m_first_endpoint;
|
||
|
uint m_second_endpoint;
|
||
|
|
||
|
uint8 m_selectors[cChunkPixelWidth * cChunkPixelHeight];
|
||
|
|
||
|
void set_selector(uint x, uint y, uint s)
|
||
|
{
|
||
|
CRNLIB_ASSERT((x < m_pixel_width) && (y < m_pixel_height));
|
||
|
m_selectors[x + y * m_pixel_width] = static_cast<uint8>(s);
|
||
|
}
|
||
|
|
||
|
uint get_selector(uint x, uint y) const
|
||
|
{
|
||
|
CRNLIB_ASSERT((x < m_pixel_width) && (y < m_pixel_height));
|
||
|
return m_selectors[x + y * m_pixel_width];
|
||
|
}
|
||
|
|
||
|
uint8 m_pixel_width;
|
||
|
uint8 m_pixel_height;
|
||
|
|
||
|
uint8 m_layout_index;
|
||
|
|
||
|
bool m_alpha_encoding;
|
||
|
};
|
||
|
|
||
|
struct compressed_chunk
|
||
|
{
|
||
|
compressed_chunk() { utils::zero_object(*this); }
|
||
|
|
||
|
uint8 m_encoding_index;
|
||
|
|
||
|
uint8 m_num_tiles;
|
||
|
|
||
|
compressed_tile m_tiles[cChunkMaxTiles];
|
||
|
compressed_tile m_quantized_tiles[cChunkMaxTiles];
|
||
|
|
||
|
uint16 m_endpoint_cluster_index[cChunkMaxTiles];
|
||
|
uint16 m_selector_cluster_index[cChunkBlockHeight][cChunkBlockWidth];
|
||
|
};
|
||
|
|
||
|
typedef crnlib::vector<compressed_chunk> compressed_chunk_vec;
|
||
|
enum
|
||
|
{
|
||
|
cColorChunks = 0,
|
||
|
cAlpha0Chunks = 1,
|
||
|
cAlpha1Chunks = 2,
|
||
|
|
||
|
cNumCompressedChunkVecs = 3
|
||
|
};
|
||
|
compressed_chunk_vec m_compressed_chunks[cNumCompressedChunkVecs];
|
||
|
|
||
|
volatile atomic32_t m_encoding_hist[cNumChunkEncodings];
|
||
|
|
||
|
atomic32_t m_total_tiles;
|
||
|
|
||
|
void compress_dxt1_block(
|
||
|
dxt1_endpoint_optimizer::results& results,
|
||
|
uint chunk_index, const image_u8& chunk, uint x_ofs, uint y_ofs, uint width, uint height,
|
||
|
uint8* pSelectors);
|
||
|
|
||
|
void compress_dxt5_block(
|
||
|
dxt5_endpoint_optimizer::results& results,
|
||
|
uint chunk_index, const image_u8& chunk, uint x_ofs, uint y_ofs, uint width, uint height, uint component_index,
|
||
|
uint8* pAlpha_selectors);
|
||
|
|
||
|
void determine_compressed_chunks_task(uint64 data, void* pData_ptr);
|
||
|
bool determine_compressed_chunks();
|
||
|
|
||
|
struct tile_cluster
|
||
|
{
|
||
|
tile_cluster() : m_first_endpoint(0), m_second_endpoint(0), m_error(0), m_alpha_encoding(false) { }
|
||
|
|
||
|
// first = chunk, second = tile
|
||
|
// if an alpha tile, second's upper 16 bits contains the alpha index (0 or 1)
|
||
|
crnlib::vector< std::pair<uint, uint> > m_tiles;
|
||
|
|
||
|
uint m_first_endpoint;
|
||
|
uint m_second_endpoint;
|
||
|
uint64 m_error;
|
||
|
|
||
|
bool m_alpha_encoding;
|
||
|
};
|
||
|
|
||
|
typedef crnlib::vector<tile_cluster> tile_cluster_vec;
|
||
|
|
||
|
tile_cluster_vec m_color_clusters;
|
||
|
tile_cluster_vec m_alpha_clusters;
|
||
|
|
||
|
selectors_vec m_color_selectors;
|
||
|
selectors_vec m_alpha_selectors;
|
||
|
|
||
|
// For each selector, this array indicates every chunk/tile/tile block that use this color selector.
|
||
|
struct block_id
|
||
|
{
|
||
|
block_id() { utils::zero_object(*this); }
|
||
|
|
||
|
block_id(uint chunk_index, uint alpha_index, uint tile_index, uint block_x, uint block_y) :
|
||
|
m_chunk_index(chunk_index), m_alpha_index((uint8)alpha_index), m_tile_index((uint8)tile_index), m_block_x((uint8)block_x), m_block_y((uint8)block_y) { }
|
||
|
|
||
|
uint m_chunk_index;
|
||
|
uint8 m_alpha_index;
|
||
|
uint8 m_tile_index;
|
||
|
uint8 m_block_x;
|
||
|
uint8 m_block_y;
|
||
|
};
|
||
|
|
||
|
typedef crnlib::vector< crnlib::vector< block_id > > chunk_blocks_using_selectors_vec;
|
||
|
chunk_blocks_using_selectors_vec m_chunk_blocks_using_color_selectors;
|
||
|
chunk_blocks_using_selectors_vec m_chunk_blocks_using_alpha_selectors; // second's upper 16 bits contain alpha index!
|
||
|
|
||
|
crnlib::vector<uint> m_color_endpoints; // not valid until end, only for user access
|
||
|
crnlib::vector<uint> m_alpha_endpoints; // not valid until end, only for user access
|
||
|
|
||
|
// Debugging
|
||
|
pixel_chunk_vec m_dbg_chunk_pixels;
|
||
|
pixel_chunk_vec m_dbg_chunk_pixels_tile_vis;
|
||
|
pixel_chunk_vec m_dbg_chunk_pixels_color_quantized;
|
||
|
pixel_chunk_vec m_dbg_chunk_pixels_alpha_quantized;
|
||
|
|
||
|
pixel_chunk_vec m_dbg_chunk_pixels_orig_color_selectors;
|
||
|
pixel_chunk_vec m_dbg_chunk_pixels_quantized_color_selectors;
|
||
|
pixel_chunk_vec m_dbg_chunk_pixels_final_color_selectors;
|
||
|
|
||
|
pixel_chunk_vec m_dbg_chunk_pixels_orig_alpha_selectors;
|
||
|
pixel_chunk_vec m_dbg_chunk_pixels_quantized_alpha_selectors;
|
||
|
pixel_chunk_vec m_dbg_chunk_pixels_final_alpha_selectors;
|
||
|
|
||
|
pixel_chunk_vec m_dbg_chunk_pixels_final;
|
||
|
|
||
|
crn_thread_id_t m_main_thread_id;
|
||
|
bool m_canceled;
|
||
|
task_pool* m_pTask_pool;
|
||
|
|
||
|
int m_prev_phase_index;
|
||
|
int m_prev_percentage_complete;
|
||
|
|
||
|
typedef vec<6, float> vec6F;
|
||
|
typedef vec<16, float> vec16F;
|
||
|
typedef tree_clusterizer<vec2F> vec2F_tree_vq;
|
||
|
typedef tree_clusterizer<vec6F> vec6F_tree_vq;
|
||
|
typedef tree_clusterizer<vec16F> vec16F_tree_vq;
|
||
|
|
||
|
struct assign_color_endpoint_clusters_state
|
||
|
{
|
||
|
CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(assign_color_endpoint_clusters_state);
|
||
|
|
||
|
assign_color_endpoint_clusters_state(vec6F_tree_vq& vq, crnlib::vector< crnlib::vector<vec6F> >& training_vecs) :
|
||
|
m_vq(vq), m_training_vecs(training_vecs) { }
|
||
|
|
||
|
vec6F_tree_vq& m_vq;
|
||
|
crnlib::vector< crnlib::vector<vec6F> >& m_training_vecs;
|
||
|
};
|
||
|
|
||
|
struct create_selector_codebook_state
|
||
|
{
|
||
|
CRNLIB_NO_COPY_OR_ASSIGNMENT_OP(create_selector_codebook_state);
|
||
|
|
||
|
create_selector_codebook_state(dxt_hc& hc, bool alpha_blocks, uint comp_index_start, uint comp_index_end, vec16F_tree_vq& selector_vq, chunk_blocks_using_selectors_vec& chunk_blocks_using_selectors, selectors_vec& selectors_cb) :
|
||
|
m_hc(hc),
|
||
|
m_alpha_blocks(alpha_blocks),
|
||
|
m_comp_index_start(comp_index_start),
|
||
|
m_comp_index_end(comp_index_end),
|
||
|
m_selector_vq(selector_vq),
|
||
|
m_chunk_blocks_using_selectors(chunk_blocks_using_selectors),
|
||
|
m_selectors_cb(selectors_cb)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
dxt_hc& m_hc;
|
||
|
bool m_alpha_blocks;
|
||
|
uint m_comp_index_start;
|
||
|
uint m_comp_index_end;
|
||
|
vec16F_tree_vq& m_selector_vq;
|
||
|
chunk_blocks_using_selectors_vec& m_chunk_blocks_using_selectors;
|
||
|
selectors_vec& m_selectors_cb;
|
||
|
|
||
|
mutable spinlock m_chunk_blocks_using_selectors_lock;
|
||
|
};
|
||
|
|
||
|
void assign_color_endpoint_clusters_task(uint64 data, void* pData_ptr);
|
||
|
bool determine_color_endpoint_clusters();
|
||
|
|
||
|
struct determine_alpha_endpoint_clusters_state
|
||
|
{
|
||
|
vec2F_tree_vq m_vq;
|
||
|
crnlib::vector< crnlib::vector<vec2F> > m_training_vecs[2];
|
||
|
};
|
||
|
|
||
|
void determine_alpha_endpoint_clusters_task(uint64 data, void* pData_ptr);
|
||
|
bool determine_alpha_endpoint_clusters();
|
||
|
|
||
|
void determine_color_endpoint_codebook_task(uint64 data, void* pData_ptr);
|
||
|
bool determine_color_endpoint_codebook();
|
||
|
|
||
|
void determine_alpha_endpoint_codebook_task(uint64 data, void* pData_ptr);
|
||
|
bool determine_alpha_endpoint_codebook();
|
||
|
|
||
|
void create_quantized_debug_images();
|
||
|
|
||
|
void create_selector_codebook_task(uint64 data, void* pData_ptr);
|
||
|
bool create_selector_codebook(bool alpha_blocks);
|
||
|
|
||
|
bool refine_quantized_color_endpoints();
|
||
|
bool refine_quantized_color_selectors();
|
||
|
bool refine_quantized_alpha_endpoints();
|
||
|
bool refine_quantized_alpha_selectors();
|
||
|
void create_final_debug_image();
|
||
|
bool create_chunk_encodings();
|
||
|
bool update_progress(uint phase_index, uint subphase_index, uint subphase_total);
|
||
|
bool compress_internal(const params& p, uint num_chunks, const pixel_chunk* pChunks);
|
||
|
};
|
||
|
|
||
|
CRNLIB_DEFINE_BITWISE_COPYABLE(dxt_hc::pixel_chunk);
|
||
|
CRNLIB_DEFINE_BITWISE_COPYABLE(dxt_hc::chunk_encoding);
|
||
|
CRNLIB_DEFINE_BITWISE_COPYABLE(dxt_hc::selectors);
|
||
|
|
||
|
} // namespace crnlib
|