CompressHelpers: Support XZ compression/decompression

This commit is contained in:
Stenzek 2024-10-22 19:40:02 +10:00
parent 19a0854528
commit 3a76485e4b
No known key found for this signature in database
4 changed files with 301 additions and 3 deletions

View File

@ -77,7 +77,7 @@ target_precompile_headers(util PRIVATE "pch.h")
target_include_directories(util PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/..") target_include_directories(util PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/..")
target_include_directories(util PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/..") target_include_directories(util PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/..")
target_link_libraries(util PUBLIC common simpleini imgui) target_link_libraries(util PUBLIC common simpleini imgui)
target_link_libraries(util PRIVATE libchdr JPEG::JPEG PNG::PNG WebP::libwebp lunasvg::lunasvg ZLIB::ZLIB SoundTouch::SoundTouchDLL xxhash Zstd::Zstd reshadefx) target_link_libraries(util PRIVATE libchdr lzma JPEG::JPEG PNG::PNG WebP::libwebp lunasvg::lunasvg ZLIB::ZLIB SoundTouch::SoundTouchDLL xxhash Zstd::Zstd reshadefx)
if(ENABLE_X11) if(ENABLE_X11)
target_compile_definitions(util PRIVATE "-DENABLE_X11=1") target_compile_definitions(util PRIVATE "-DENABLE_X11=1")

View File

@ -3,15 +3,26 @@
#include "compress_helpers.h" #include "compress_helpers.h"
#include "common/align.h"
#include "common/assert.h" #include "common/assert.h"
#include "common/error.h" #include "common/error.h"
#include "common/file_system.h" #include "common/file_system.h"
#include "common/log.h"
#include "common/path.h" #include "common/path.h"
#include "common/scoped_guard.h"
#include "common/string_util.h" #include "common/string_util.h"
#include "7zCrc.h"
#include "Alloc.h"
#include "Xz.h"
#include "XzCrc64.h"
#include "XzEnc.h"
#include <zstd.h> #include <zstd.h>
#include <zstd_errors.h> #include <zstd_errors.h>
LOG_CHANNEL(CompressHelpers);
// TODO: Use streaming API to avoid mallocing the whole input buffer. But one read() call is probably still faster.. // TODO: Use streaming API to avoid mallocing the whole input buffer. But one read() call is probably still faster..
namespace CompressHelpers { namespace CompressHelpers {
@ -23,13 +34,284 @@ static bool DecompressHelper(OptionalByteBuffer& ret, CompressType type, T data,
template<typename T> template<typename T>
static bool CompressHelper(OptionalByteBuffer& ret, CompressType type, T data, int clevel, Error* error); static bool CompressHelper(OptionalByteBuffer& ret, CompressType type, T data, int clevel, Error* error);
static void Init7ZCRCTables();
static bool XzCompress(OptionalByteBuffer& ret, const u8* data, size_t data_size, int clevel, Error* error);
static bool XzDecompress(OptionalByteBuffer& ret, const u8* data, size_t data_size, Error* error);
static std::once_flag s_lzma_crc_table_init;
} // namespace CompressHelpers } // namespace CompressHelpers
void CompressHelpers::Init7ZCRCTables()
{
std::call_once(s_lzma_crc_table_init, []() {
CrcGenerateTable();
Crc64GenerateTable();
});
}
const char* CompressHelpers::SZErrorToString(int res)
{
// clang-format off
switch (res)
{
case SZ_OK: return "SZ_OK";
case SZ_ERROR_DATA: return "SZ_ERROR_DATA";
case SZ_ERROR_MEM: return "SZ_ERROR_MEM";
case SZ_ERROR_CRC: return "SZ_ERROR_CRC";
case SZ_ERROR_UNSUPPORTED: return "SZ_ERROR_UNSUPPORTED";
case SZ_ERROR_PARAM: return "SZ_ERROR_PARAM";
case SZ_ERROR_INPUT_EOF: return "SZ_ERROR_INPUT_EOF";
case SZ_ERROR_OUTPUT_EOF: return "SZ_ERROR_OUTPUT_EOF";
case SZ_ERROR_READ: return "SZ_ERROR_READ";
case SZ_ERROR_WRITE: return "SZ_ERROR_WRITE";
case SZ_ERROR_PROGRESS: return "SZ_ERROR_PROGRESS";
case SZ_ERROR_FAIL: return "SZ_ERROR_FAIL";
case SZ_ERROR_THREAD: return "SZ_ERROR_THREAD";
case SZ_ERROR_ARCHIVE: return "SZ_ERROR_ARCHIVE";
case SZ_ERROR_NO_ARCHIVE: return "SZ_ERROR_NO_ARCHIVE";
default: return "SZ_UNKNOWN";
}
// clang-format on
}
bool CompressHelpers::XzCompress(OptionalByteBuffer& ret, const u8* data, size_t data_size, int clevel, Error* error)
{
Init7ZCRCTables();
struct MemoryInStream
{
ISeqInStream vt;
const u8* buffer;
size_t buffer_size;
size_t read_pos;
};
MemoryInStream mis = {{.Read = [](const ISeqInStream* p, void* buf, size_t* size) -> SRes {
MemoryInStream* mis = Z7_CONTAINER_FROM_VTBL(p, MemoryInStream, vt);
const size_t avail = mis->buffer_size - mis->read_pos;
const size_t copy = std::min(avail, *size);
std::memcpy(buf, &mis->buffer[mis->read_pos], copy);
mis->read_pos += copy;
*size = copy;
return SZ_OK;
}},
data,
data_size,
0};
// Bit crap, extra copy here..
struct DumpOutStream
{
ISeqOutStream vt;
DynamicHeapArray<u8> out_data;
size_t out_pos;
};
DumpOutStream dos = {.vt = {.Write = [](const ISeqOutStream* p, const void* buf, size_t size) -> size_t {
DumpOutStream* dos = Z7_CONTAINER_FROM_VTBL(p, DumpOutStream, vt);
if ((dos->out_pos + size) > dos->out_data.size())
dos->out_data.resize(std::max(dos->out_pos + size, dos->out_data.size() * 2));
std::memcpy(&dos->out_data[dos->out_pos], buf, size);
dos->out_pos += size;
return size;
}},
.out_data = DynamicHeapArray<u8>(data_size / 2),
.out_pos = 0};
CXzProps props;
XzProps_Init(&props);
props.lzma2Props.lzmaProps.level = std::clamp(clevel, 1, 9);
const SRes res = Xz_Encode(&dos.vt, &mis.vt, &props, nullptr);
if (res != SZ_OK)
{
Error::SetStringFmt(error, "Xz_Encode() failed: {} ({})", SZErrorToString(res), static_cast<int>(res));
return false;
}
dos.out_data.resize(dos.out_pos);
ret = OptionalByteBuffer(std::move(dos.out_data));
return true;
}
bool CompressHelpers::XzDecompress(OptionalByteBuffer& ret, const u8* data, size_t data_size, Error* error)
{
static constexpr size_t kInputBufSize = static_cast<size_t>(1) << 18;
Init7ZCRCTables();
struct MyInStream
{
ISeekInStream vt;
const u8* data;
size_t data_size;
size_t data_pos;
};
MyInStream mis = {.vt = {.Read = [](const ISeekInStream* p, void* buf, size_t* size) -> SRes {
MyInStream* mis = Z7_CONTAINER_FROM_VTBL(p, MyInStream, vt);
const size_t size_to_read = *size;
const size_t size_to_copy = std::min(size_to_read, mis->data_size - mis->data_pos);
std::memcpy(buf, &mis->data[mis->data_pos], size_to_copy);
mis->data_pos += size_to_copy;
return (size_to_copy == size_to_read) ? SZ_OK : SZ_ERROR_READ;
},
.Seek = [](const ISeekInStream* p, Int64* pos, ESzSeek origin) -> SRes {
MyInStream* mis = Z7_CONTAINER_FROM_VTBL(p, MyInStream, vt);
static_assert(SZ_SEEK_CUR == SEEK_CUR && SZ_SEEK_SET == SEEK_SET &&
SZ_SEEK_END == SEEK_END);
if (origin == SZ_SEEK_SET)
{
if (*pos < 0 || static_cast<size_t>(*pos) > mis->data_size)
return SZ_ERROR_READ;
mis->data_pos = static_cast<size_t>(*pos);
return SZ_OK;
}
else if (origin == SZ_SEEK_END)
{
mis->data_pos = mis->data_size;
*pos = static_cast<s64>(mis->data_pos);
return SZ_OK;
}
else if (origin == SZ_SEEK_CUR)
{
const s64 new_pos = static_cast<s64>(mis->data_pos) + *pos;
if (new_pos < 0 || static_cast<size_t>(new_pos) > mis->data_size)
return SZ_ERROR_READ;
mis->data_pos = static_cast<size_t>(new_pos);
*pos = new_pos;
return SZ_OK;
}
else
{
return SZ_ERROR_READ;
}
}},
.data = data,
.data_size = data_size,
.data_pos = 0};
CLookToRead2 look_stream = {};
LookToRead2_INIT(&look_stream);
LookToRead2_CreateVTable(&look_stream, False);
look_stream.realStream = &mis.vt;
look_stream.bufSize = kInputBufSize;
look_stream.buf = static_cast<Byte*>(ISzAlloc_Alloc(&g_Alloc, kInputBufSize));
if (!look_stream.buf)
{
Error::SetString(error, "Failed to allocate lookahead buffer");
return false;
}
const ScopedGuard guard = [&look_stream]() {
if (look_stream.buf)
ISzAlloc_Free(&g_Alloc, look_stream.buf);
};
// Read blocks
CXzs xzs;
Xzs_Construct(&xzs);
const ScopedGuard xzs_guard([&xzs]() { Xzs_Free(&xzs, &g_Alloc); });
Int64 start_pos = static_cast<Int64>(data_size);
SRes res = Xzs_ReadBackward(&xzs, &look_stream.vt, &start_pos, nullptr, &g_Alloc);
if (res != SZ_OK)
{
Error::SetStringFmt(error, "Xzs_ReadBackward() failed: {} ({})", SZErrorToString(res), res);
return false;
}
const size_t num_blocks = Xzs_GetNumBlocks(&xzs);
if (num_blocks == 0)
{
Error::SetString(error, "Stream has no blocks.");
return false;
}
// compute output size
size_t stream_size = 0;
for (int sn = static_cast<int>(xzs.num - 1); sn >= 0; sn--)
{
const CXzStream& stream = xzs.streams[sn];
for (size_t bn = 0; bn < stream.numBlocks; bn++)
{
const CXzBlockSizes& block = stream.blocks[bn];
stream_size += block.unpackSize;
}
}
if (stream_size == 0)
{
Error::SetString(error, "Stream is empty.");
return false;
}
ByteBuffer out_buffer(stream_size);
size_t out_pos = 0;
CXzUnpacker unpacker = {};
XzUnpacker_Construct(&unpacker, &g_Alloc);
for (int sn = static_cast<int>(xzs.num - 1); sn >= 0; sn--)
{
const CXzStream& stream = xzs.streams[sn];
size_t src_offset = stream.startOffset + XZ_STREAM_HEADER_SIZE;
if (src_offset >= data_size)
break;
for (size_t bn = 0; bn < stream.numBlocks; bn++)
{
const CXzBlockSizes& block = stream.blocks[bn];
XzUnpacker_Init(&unpacker);
unpacker.streamFlags = stream.flags;
XzUnpacker_PrepareToRandomBlockDecoding(&unpacker);
XzUnpacker_SetOutBuf(&unpacker, &out_buffer[out_pos], out_buffer.size() - out_pos);
const size_t orig_compressed_size =
std::min<size_t>(Common::AlignUpPow2(block.totalSize, 4),
static_cast<size_t>(data_size - src_offset)); // LZMA blocks are 4 byte aligned?;
SizeT block_uncompressed_size = block.unpackSize;
SizeT block_compressed_size = orig_compressed_size;
ECoderStatus status;
res = XzUnpacker_Code(&unpacker, nullptr, &block_uncompressed_size, &data[src_offset], &block_compressed_size,
true, CODER_FINISH_END, &status);
if (res != SZ_OK || status != CODER_STATUS_FINISHED_WITH_MARK) [[unlikely]]
{
Error::SetStringFmt(error, "XzUnpacker_Code() failed: {} ({}) (status {})", SZErrorToString(res), res,
static_cast<unsigned>(status));
return false;
}
if (block_compressed_size != orig_compressed_size || block_uncompressed_size != block.unpackSize)
{
WARNING_LOG("Decompress size mismatch: {}/{} vs {}/{}", block_compressed_size, block_uncompressed_size,
orig_compressed_size, block.unpackSize);
}
out_pos += block_uncompressed_size;
src_offset += block_compressed_size;
}
}
if (out_pos != out_buffer.size())
{
Error::SetStringFmt(error, "Only decompressed {} of {} bytes", out_pos, out_buffer.size());
return false;
}
ret = std::move(out_buffer);
return true;
}
std::optional<CompressHelpers::CompressType> CompressHelpers::GetCompressType(const std::string_view path, Error* error) std::optional<CompressHelpers::CompressType> CompressHelpers::GetCompressType(const std::string_view path, Error* error)
{ {
const std::string_view extension = Path::GetExtension(path); const std::string_view extension = Path::GetExtension(path);
if (StringUtil::EqualNoCase(extension, "zst")) if (StringUtil::EqualNoCase(extension, "zst"))
return CompressType::Zstandard; return CompressType::Zstandard;
else if (StringUtil::EqualNoCase(extension, "xz"))
return CompressType::XZ;
return CompressType::Uncompressed; return CompressType::Uncompressed;
} }
@ -92,7 +374,11 @@ bool CompressHelpers::DecompressHelper(CompressHelpers::OptionalByteBuffer& ret,
return true; return true;
} }
break;
case CompressType::XZ:
{
return XzDecompress(ret, data.data(), data.size(), error);
}
DefaultCaseIsUnreachable() DefaultCaseIsUnreachable()
} }
@ -139,6 +425,11 @@ bool CompressHelpers::CompressHelper(OptionalByteBuffer& ret, CompressType type,
return true; return true;
} }
case CompressType::XZ:
{
return XzCompress(ret, data.data(), data.size(), clevel, error);
}
DefaultCaseIsUnreachable() DefaultCaseIsUnreachable()
} }
} }

View File

@ -15,6 +15,7 @@ enum class CompressType
{ {
Uncompressed, Uncompressed,
Zstandard, Zstandard,
XZ,
Count Count
}; };
@ -44,4 +45,7 @@ bool CompressToFile(const char* path, std::span<const u8> data, int clevel = -1,
Error* error = nullptr); Error* error = nullptr);
bool CompressToFile(CompressType type, const char* path, std::span<const u8> data, int clevel = -1, bool CompressToFile(CompressType type, const char* path, std::span<const u8> data, int clevel = -1,
bool atomic_write = true, Error* error = nullptr); bool atomic_write = true, Error* error = nullptr);
const char* SZErrorToString(int res);
} // namespace CompressHelpers } // namespace CompressHelpers

View File

@ -217,6 +217,9 @@
<ProjectReference Include="..\..\dep\libchdr\libchdr.vcxproj"> <ProjectReference Include="..\..\dep\libchdr\libchdr.vcxproj">
<Project>{425d6c99-d1c8-43c2-b8ac-4d7b1d941017}</Project> <Project>{425d6c99-d1c8-43c2-b8ac-4d7b1d941017}</Project>
</ProjectReference> </ProjectReference>
<ProjectReference Include="..\..\dep\lzma\lzma.vcxproj">
<Project>{dd944834-7899-4c1c-a4c1-064b5009d239}</Project>
</ProjectReference>
<ProjectReference Include="..\..\dep\reshadefx\reshadefx.vcxproj"> <ProjectReference Include="..\..\dep\reshadefx\reshadefx.vcxproj">
<Project>{27b8d4bb-4f01-4432-bc14-9bf6ca458eee}</Project> <Project>{27b8d4bb-4f01-4432-bc14-9bf6ca458eee}</Project>
</ProjectReference> </ProjectReference>
@ -250,7 +253,7 @@
<Import Project="util.props" /> <Import Project="util.props" />
<ItemDefinitionGroup> <ItemDefinitionGroup>
<ClCompile> <ClCompile>
<AdditionalIncludeDirectories>%(AdditionalIncludeDirectories);$(SolutionDir)dep\reshadefx\include</AdditionalIncludeDirectories> <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories);$(SolutionDir)dep\reshadefx\include;$(SolutionDir)dep\lzma\include</AdditionalIncludeDirectories>
<ObjectFileName>$(IntDir)/%(RelativeDir)/</ObjectFileName> <ObjectFileName>$(IntDir)/%(RelativeDir)/</ObjectFileName>
<PrecompiledHeader>Use</PrecompiledHeader> <PrecompiledHeader>Use</PrecompiledHeader>
<PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile> <PrecompiledHeaderFile>pch.h</PrecompiledHeaderFile>