CompressHelpers: Support XZ compression/decompression
This commit is contained in:
parent
19a0854528
commit
3a76485e4b
|
@ -77,7 +77,7 @@ target_precompile_headers(util PRIVATE "pch.h")
|
|||
target_include_directories(util PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/..")
|
||||
target_include_directories(util PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/..")
|
||||
target_link_libraries(util PUBLIC common simpleini imgui)
|
||||
target_link_libraries(util PRIVATE libchdr JPEG::JPEG PNG::PNG WebP::libwebp lunasvg::lunasvg ZLIB::ZLIB SoundTouch::SoundTouchDLL xxhash Zstd::Zstd reshadefx)
|
||||
target_link_libraries(util PRIVATE libchdr lzma JPEG::JPEG PNG::PNG WebP::libwebp lunasvg::lunasvg ZLIB::ZLIB SoundTouch::SoundTouchDLL xxhash Zstd::Zstd reshadefx)
|
||||
|
||||
if(ENABLE_X11)
|
||||
target_compile_definitions(util PRIVATE "-DENABLE_X11=1")
|
||||
|
|
|
@ -3,15 +3,26 @@
|
|||
|
||||
#include "compress_helpers.h"
|
||||
|
||||
#include "common/align.h"
|
||||
#include "common/assert.h"
|
||||
#include "common/error.h"
|
||||
#include "common/file_system.h"
|
||||
#include "common/log.h"
|
||||
#include "common/path.h"
|
||||
#include "common/scoped_guard.h"
|
||||
#include "common/string_util.h"
|
||||
|
||||
#include "7zCrc.h"
|
||||
#include "Alloc.h"
|
||||
#include "Xz.h"
|
||||
#include "XzCrc64.h"
|
||||
#include "XzEnc.h"
|
||||
|
||||
#include <zstd.h>
|
||||
#include <zstd_errors.h>
|
||||
|
||||
LOG_CHANNEL(CompressHelpers);
|
||||
|
||||
// TODO: Use streaming API to avoid mallocing the whole input buffer. But one read() call is probably still faster..
|
||||
|
||||
namespace CompressHelpers {
|
||||
|
@ -23,13 +34,284 @@ static bool DecompressHelper(OptionalByteBuffer& ret, CompressType type, T data,
|
|||
|
||||
template<typename T>
|
||||
static bool CompressHelper(OptionalByteBuffer& ret, CompressType type, T data, int clevel, Error* error);
|
||||
|
||||
static void Init7ZCRCTables();
|
||||
static bool XzCompress(OptionalByteBuffer& ret, const u8* data, size_t data_size, int clevel, Error* error);
|
||||
static bool XzDecompress(OptionalByteBuffer& ret, const u8* data, size_t data_size, Error* error);
|
||||
|
||||
static std::once_flag s_lzma_crc_table_init;
|
||||
} // namespace CompressHelpers
|
||||
|
||||
void CompressHelpers::Init7ZCRCTables()
|
||||
{
|
||||
std::call_once(s_lzma_crc_table_init, []() {
|
||||
CrcGenerateTable();
|
||||
Crc64GenerateTable();
|
||||
});
|
||||
}
|
||||
|
||||
const char* CompressHelpers::SZErrorToString(int res)
|
||||
{
|
||||
// clang-format off
|
||||
switch (res)
|
||||
{
|
||||
case SZ_OK: return "SZ_OK";
|
||||
case SZ_ERROR_DATA: return "SZ_ERROR_DATA";
|
||||
case SZ_ERROR_MEM: return "SZ_ERROR_MEM";
|
||||
case SZ_ERROR_CRC: return "SZ_ERROR_CRC";
|
||||
case SZ_ERROR_UNSUPPORTED: return "SZ_ERROR_UNSUPPORTED";
|
||||
case SZ_ERROR_PARAM: return "SZ_ERROR_PARAM";
|
||||
case SZ_ERROR_INPUT_EOF: return "SZ_ERROR_INPUT_EOF";
|
||||
case SZ_ERROR_OUTPUT_EOF: return "SZ_ERROR_OUTPUT_EOF";
|
||||
case SZ_ERROR_READ: return "SZ_ERROR_READ";
|
||||
case SZ_ERROR_WRITE: return "SZ_ERROR_WRITE";
|
||||
case SZ_ERROR_PROGRESS: return "SZ_ERROR_PROGRESS";
|
||||
case SZ_ERROR_FAIL: return "SZ_ERROR_FAIL";
|
||||
case SZ_ERROR_THREAD: return "SZ_ERROR_THREAD";
|
||||
case SZ_ERROR_ARCHIVE: return "SZ_ERROR_ARCHIVE";
|
||||
case SZ_ERROR_NO_ARCHIVE: return "SZ_ERROR_NO_ARCHIVE";
|
||||
default: return "SZ_UNKNOWN";
|
||||
}
|
||||
// clang-format on
|
||||
}
|
||||
|
||||
bool CompressHelpers::XzCompress(OptionalByteBuffer& ret, const u8* data, size_t data_size, int clevel, Error* error)
|
||||
{
|
||||
Init7ZCRCTables();
|
||||
|
||||
struct MemoryInStream
|
||||
{
|
||||
ISeqInStream vt;
|
||||
const u8* buffer;
|
||||
size_t buffer_size;
|
||||
size_t read_pos;
|
||||
};
|
||||
MemoryInStream mis = {{.Read = [](const ISeqInStream* p, void* buf, size_t* size) -> SRes {
|
||||
MemoryInStream* mis = Z7_CONTAINER_FROM_VTBL(p, MemoryInStream, vt);
|
||||
const size_t avail = mis->buffer_size - mis->read_pos;
|
||||
const size_t copy = std::min(avail, *size);
|
||||
|
||||
std::memcpy(buf, &mis->buffer[mis->read_pos], copy);
|
||||
mis->read_pos += copy;
|
||||
*size = copy;
|
||||
return SZ_OK;
|
||||
}},
|
||||
data,
|
||||
data_size,
|
||||
0};
|
||||
|
||||
// Bit crap, extra copy here..
|
||||
struct DumpOutStream
|
||||
{
|
||||
ISeqOutStream vt;
|
||||
DynamicHeapArray<u8> out_data;
|
||||
size_t out_pos;
|
||||
};
|
||||
DumpOutStream dos = {.vt = {.Write = [](const ISeqOutStream* p, const void* buf, size_t size) -> size_t {
|
||||
DumpOutStream* dos = Z7_CONTAINER_FROM_VTBL(p, DumpOutStream, vt);
|
||||
if ((dos->out_pos + size) > dos->out_data.size())
|
||||
dos->out_data.resize(std::max(dos->out_pos + size, dos->out_data.size() * 2));
|
||||
std::memcpy(&dos->out_data[dos->out_pos], buf, size);
|
||||
dos->out_pos += size;
|
||||
return size;
|
||||
}},
|
||||
.out_data = DynamicHeapArray<u8>(data_size / 2),
|
||||
.out_pos = 0};
|
||||
|
||||
CXzProps props;
|
||||
XzProps_Init(&props);
|
||||
props.lzma2Props.lzmaProps.level = std::clamp(clevel, 1, 9);
|
||||
|
||||
const SRes res = Xz_Encode(&dos.vt, &mis.vt, &props, nullptr);
|
||||
if (res != SZ_OK)
|
||||
{
|
||||
Error::SetStringFmt(error, "Xz_Encode() failed: {} ({})", SZErrorToString(res), static_cast<int>(res));
|
||||
return false;
|
||||
}
|
||||
|
||||
dos.out_data.resize(dos.out_pos);
|
||||
ret = OptionalByteBuffer(std::move(dos.out_data));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CompressHelpers::XzDecompress(OptionalByteBuffer& ret, const u8* data, size_t data_size, Error* error)
|
||||
{
|
||||
static constexpr size_t kInputBufSize = static_cast<size_t>(1) << 18;
|
||||
|
||||
Init7ZCRCTables();
|
||||
|
||||
struct MyInStream
|
||||
{
|
||||
ISeekInStream vt;
|
||||
const u8* data;
|
||||
size_t data_size;
|
||||
size_t data_pos;
|
||||
};
|
||||
|
||||
MyInStream mis = {.vt = {.Read = [](const ISeekInStream* p, void* buf, size_t* size) -> SRes {
|
||||
MyInStream* mis = Z7_CONTAINER_FROM_VTBL(p, MyInStream, vt);
|
||||
const size_t size_to_read = *size;
|
||||
const size_t size_to_copy = std::min(size_to_read, mis->data_size - mis->data_pos);
|
||||
std::memcpy(buf, &mis->data[mis->data_pos], size_to_copy);
|
||||
mis->data_pos += size_to_copy;
|
||||
return (size_to_copy == size_to_read) ? SZ_OK : SZ_ERROR_READ;
|
||||
},
|
||||
.Seek = [](const ISeekInStream* p, Int64* pos, ESzSeek origin) -> SRes {
|
||||
MyInStream* mis = Z7_CONTAINER_FROM_VTBL(p, MyInStream, vt);
|
||||
static_assert(SZ_SEEK_CUR == SEEK_CUR && SZ_SEEK_SET == SEEK_SET &&
|
||||
SZ_SEEK_END == SEEK_END);
|
||||
if (origin == SZ_SEEK_SET)
|
||||
{
|
||||
if (*pos < 0 || static_cast<size_t>(*pos) > mis->data_size)
|
||||
return SZ_ERROR_READ;
|
||||
mis->data_pos = static_cast<size_t>(*pos);
|
||||
return SZ_OK;
|
||||
}
|
||||
else if (origin == SZ_SEEK_END)
|
||||
{
|
||||
mis->data_pos = mis->data_size;
|
||||
*pos = static_cast<s64>(mis->data_pos);
|
||||
return SZ_OK;
|
||||
}
|
||||
else if (origin == SZ_SEEK_CUR)
|
||||
{
|
||||
const s64 new_pos = static_cast<s64>(mis->data_pos) + *pos;
|
||||
if (new_pos < 0 || static_cast<size_t>(new_pos) > mis->data_size)
|
||||
return SZ_ERROR_READ;
|
||||
mis->data_pos = static_cast<size_t>(new_pos);
|
||||
*pos = new_pos;
|
||||
return SZ_OK;
|
||||
}
|
||||
else
|
||||
{
|
||||
return SZ_ERROR_READ;
|
||||
}
|
||||
}},
|
||||
.data = data,
|
||||
.data_size = data_size,
|
||||
.data_pos = 0};
|
||||
|
||||
CLookToRead2 look_stream = {};
|
||||
LookToRead2_INIT(&look_stream);
|
||||
LookToRead2_CreateVTable(&look_stream, False);
|
||||
look_stream.realStream = &mis.vt;
|
||||
look_stream.bufSize = kInputBufSize;
|
||||
look_stream.buf = static_cast<Byte*>(ISzAlloc_Alloc(&g_Alloc, kInputBufSize));
|
||||
if (!look_stream.buf)
|
||||
{
|
||||
Error::SetString(error, "Failed to allocate lookahead buffer");
|
||||
return false;
|
||||
}
|
||||
const ScopedGuard guard = [&look_stream]() {
|
||||
if (look_stream.buf)
|
||||
ISzAlloc_Free(&g_Alloc, look_stream.buf);
|
||||
};
|
||||
|
||||
// Read blocks
|
||||
CXzs xzs;
|
||||
Xzs_Construct(&xzs);
|
||||
const ScopedGuard xzs_guard([&xzs]() { Xzs_Free(&xzs, &g_Alloc); });
|
||||
|
||||
Int64 start_pos = static_cast<Int64>(data_size);
|
||||
SRes res = Xzs_ReadBackward(&xzs, &look_stream.vt, &start_pos, nullptr, &g_Alloc);
|
||||
if (res != SZ_OK)
|
||||
{
|
||||
Error::SetStringFmt(error, "Xzs_ReadBackward() failed: {} ({})", SZErrorToString(res), res);
|
||||
return false;
|
||||
}
|
||||
|
||||
const size_t num_blocks = Xzs_GetNumBlocks(&xzs);
|
||||
if (num_blocks == 0)
|
||||
{
|
||||
Error::SetString(error, "Stream has no blocks.");
|
||||
return false;
|
||||
}
|
||||
|
||||
// compute output size
|
||||
size_t stream_size = 0;
|
||||
for (int sn = static_cast<int>(xzs.num - 1); sn >= 0; sn--)
|
||||
{
|
||||
const CXzStream& stream = xzs.streams[sn];
|
||||
for (size_t bn = 0; bn < stream.numBlocks; bn++)
|
||||
{
|
||||
const CXzBlockSizes& block = stream.blocks[bn];
|
||||
stream_size += block.unpackSize;
|
||||
}
|
||||
}
|
||||
|
||||
if (stream_size == 0)
|
||||
{
|
||||
Error::SetString(error, "Stream is empty.");
|
||||
return false;
|
||||
}
|
||||
|
||||
ByteBuffer out_buffer(stream_size);
|
||||
size_t out_pos = 0;
|
||||
|
||||
CXzUnpacker unpacker = {};
|
||||
XzUnpacker_Construct(&unpacker, &g_Alloc);
|
||||
|
||||
for (int sn = static_cast<int>(xzs.num - 1); sn >= 0; sn--)
|
||||
{
|
||||
const CXzStream& stream = xzs.streams[sn];
|
||||
size_t src_offset = stream.startOffset + XZ_STREAM_HEADER_SIZE;
|
||||
if (src_offset >= data_size)
|
||||
break;
|
||||
|
||||
for (size_t bn = 0; bn < stream.numBlocks; bn++)
|
||||
{
|
||||
const CXzBlockSizes& block = stream.blocks[bn];
|
||||
|
||||
XzUnpacker_Init(&unpacker);
|
||||
unpacker.streamFlags = stream.flags;
|
||||
XzUnpacker_PrepareToRandomBlockDecoding(&unpacker);
|
||||
XzUnpacker_SetOutBuf(&unpacker, &out_buffer[out_pos], out_buffer.size() - out_pos);
|
||||
|
||||
const size_t orig_compressed_size =
|
||||
std::min<size_t>(Common::AlignUpPow2(block.totalSize, 4),
|
||||
static_cast<size_t>(data_size - src_offset)); // LZMA blocks are 4 byte aligned?;
|
||||
|
||||
SizeT block_uncompressed_size = block.unpackSize;
|
||||
SizeT block_compressed_size = orig_compressed_size;
|
||||
|
||||
ECoderStatus status;
|
||||
res = XzUnpacker_Code(&unpacker, nullptr, &block_uncompressed_size, &data[src_offset], &block_compressed_size,
|
||||
true, CODER_FINISH_END, &status);
|
||||
if (res != SZ_OK || status != CODER_STATUS_FINISHED_WITH_MARK) [[unlikely]]
|
||||
{
|
||||
Error::SetStringFmt(error, "XzUnpacker_Code() failed: {} ({}) (status {})", SZErrorToString(res), res,
|
||||
static_cast<unsigned>(status));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (block_compressed_size != orig_compressed_size || block_uncompressed_size != block.unpackSize)
|
||||
{
|
||||
WARNING_LOG("Decompress size mismatch: {}/{} vs {}/{}", block_compressed_size, block_uncompressed_size,
|
||||
orig_compressed_size, block.unpackSize);
|
||||
}
|
||||
|
||||
out_pos += block_uncompressed_size;
|
||||
src_offset += block_compressed_size;
|
||||
}
|
||||
}
|
||||
|
||||
if (out_pos != out_buffer.size())
|
||||
{
|
||||
Error::SetStringFmt(error, "Only decompressed {} of {} bytes", out_pos, out_buffer.size());
|
||||
return false;
|
||||
}
|
||||
|
||||
ret = std::move(out_buffer);
|
||||
return true;
|
||||
}
|
||||
|
||||
std::optional<CompressHelpers::CompressType> CompressHelpers::GetCompressType(const std::string_view path, Error* error)
|
||||
{
|
||||
const std::string_view extension = Path::GetExtension(path);
|
||||
if (StringUtil::EqualNoCase(extension, "zst"))
|
||||
return CompressType::Zstandard;
|
||||
else if (StringUtil::EqualNoCase(extension, "xz"))
|
||||
return CompressType::XZ;
|
||||
|
||||
return CompressType::Uncompressed;
|
||||
}
|
||||
|
@ -92,7 +374,11 @@ bool CompressHelpers::DecompressHelper(CompressHelpers::OptionalByteBuffer& ret,
|
|||
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
|
||||
case CompressType::XZ:
|
||||
{
|
||||
return XzDecompress(ret, data.data(), data.size(), error);
|
||||
}
|
||||
|
||||
DefaultCaseIsUnreachable()
|
||||
}
|
||||
|
@ -139,6 +425,11 @@ bool CompressHelpers::CompressHelper(OptionalByteBuffer& ret, CompressType type,
|
|||
return true;
|
||||
}
|
||||
|
||||
case CompressType::XZ:
|
||||
{
|
||||
return XzCompress(ret, data.data(), data.size(), clevel, error);
|
||||
}
|
||||
|
||||
DefaultCaseIsUnreachable()
|
||||
}
|
||||
}
|
||||
|
|
|
@ -15,6 +15,7 @@ enum class CompressType
|
|||
{
|
||||
Uncompressed,
|
||||
Zstandard,
|
||||
XZ,
|
||||
Count
|
||||
};
|
||||
|
||||
|
@ -44,4 +45,7 @@ bool CompressToFile(const char* path, std::span<const u8> data, int clevel = -1,
|
|||
Error* error = nullptr);
|
||||
bool CompressToFile(CompressType type, const char* path, std::span<const u8> data, int clevel = -1,
|
||||
bool atomic_write = true, Error* error = nullptr);
|
||||
|
||||
const char* SZErrorToString(int res);
|
||||
|
||||
} // namespace CompressHelpers
|
||||
|
|
|
@ -217,6 +217,9 @@
|
|||
<ProjectReference Include="..\..\dep\libchdr\libchdr.vcxproj">
|
||||
<Project>{425d6c99-d1c8-43c2-b8ac-4d7b1d941017}</Project>
|
||||
</ProjectReference>
|
||||
<ProjectReference Include="..\..\dep\lzma\lzma.vcxproj">
|
||||
<Project>{dd944834-7899-4c1c-a4c1-064b5009d239}</Project>
|
||||
</ProjectReference>
|
||||
<ProjectReference Include="..\..\dep\reshadefx\reshadefx.vcxproj">
|
||||
<Project>{27b8d4bb-4f01-4432-bc14-9bf6ca458eee}</Project>
|
||||
</ProjectReference>
|
||||
|
@ -250,7 +253,7 @@
|
|||
<Import Project="util.props" />
|
||||
<ItemDefinitionGroup>
|
||||
<ClCompile>
|
||||
<AdditionalIncludeDirectories>%(AdditionalIncludeDirectories);$(SolutionDir)dep\reshadefx\include</AdditionalIncludeDirectories>
|
||||
<AdditionalIncludeDirectories>%(AdditionalIncludeDirectories);$(SolutionDir)dep\reshadefx\include;$(SolutionDir)dep\lzma\include</AdditionalIncludeDirectories>
|
||||
<ObjectFileName>$(IntDir)/%(RelativeDir)/</ObjectFileName>
|
||||
<PrecompiledHeader>Use</PrecompiledHeader>
|
||||
<PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>
|
||||
|
|
Loading…
Reference in New Issue