Add .xz support

Add .xz support
This commit is contained in:
Andy Vandijck 2025-07-11 19:55:09 +02:00
parent 996a827d8e
commit 6fedbdc512
11 changed files with 818 additions and 4 deletions

View File

@ -61,6 +61,10 @@ target_sources(vbam-fex
fex/BZ2_Extractor.h
fex/BZ2_Reader.cpp
fex/BZ2_Reader.h
fex/XZ_Extractor.cpp
fex/XZ_Extractor.h
fex/XZ_Reader.cpp
fex/XZ_Reader.h
fex/Rar_Extractor.cpp
fex/Rar_Extractor.h
fex/Zip7_Extractor.cpp
@ -69,6 +73,8 @@ target_sources(vbam-fex
fex/Zip_Extractor.h
fex/Zlib_Inflater.cpp
fex/Zlib_Inflater.h
fex/LZMA_Inflater.cpp
fex/LZMA_Inflater.h
fex/BZ2_Inflater.cpp
fex/BZ2_Inflater.h
unrar/archive.cpp
@ -112,11 +118,22 @@ target_include_directories(vbam-fex
)
find_library(BZ2_LIBRARY bz2)
find_library(LZMA_LIBRARY lzma)
if (BZ2_LIBRARY)
target_compile_definitions(vbam-fex PRIVATE FEX_ENABLE_BZ2=1)
target_link_libraries(vbam-fex
PRIVATE ${BZ2_LIBRARY}
)
endif()
if (LZMA_LIBRARY)
target_compile_definitions(vbam-fex PRIVATE FEX_ENABLE_LZMA=1)
target_link_libraries(vbam-fex
PRIVATE ${LZMA_LIBRARY}
)
endif()
target_link_libraries(vbam-fex
PRIVATE ${ZLIB_LIBRARY} ${BZ2_LIBRARY}
PRIVATE ${ZLIB_LIBRARY}
)

View File

@ -21,7 +21,7 @@ int const block_size = 4096;
static const char* get_bz2_err( int code )
{
assert( code != Z_OK );
assert( code != BZ_OK );
switch ( code )
{
case BZ_MEM_ERROR: return blargg_err_memory;

View File

@ -213,7 +213,7 @@ struct fex_type_t_ {
blargg_err_t (*init)(); // Called by fex_init(). Can be NULL.
};
extern const fex_type_t_ fex_7z_type[1], fex_gz_type[1], fex_bz2_type[1], fex_rar_type[1], fex_zip_type[1],
extern const fex_type_t_ fex_7z_type[1], fex_gz_type[1], fex_bz2_type[1], fex_xz_type[1], fex_rar_type[1], fex_zip_type[1],
fex_bin_type[1];
inline blargg_err_t File_Extractor::open_v()

View File

@ -0,0 +1,367 @@
// File_Extractor 1.0.0. http://www.slack.net/~ant/
#if FEX_ENABLE_LZMA
#include <stdio.h>
#include "LZMA_Inflater.h"
/* Copyright (C) 2025 Andy Vandijck. This module is free software; you
can redistribute it and/or modify it under the terms of the GNU Lesser
General Public License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version. This
module is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
details. You should have received a copy of the GNU Lesser General Public
License along with this module; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
#include "blargg_source.h"
int const block_size = 4096;
static const char* get_lzma_err( int code )
{
assert( code != LZMA_OK );
switch ( code )
{
case LZMA_MEM_ERROR: return blargg_err_memory;
case LZMA_DATA_ERROR: return blargg_err_file_corrupt;
// TODO: handle more error codes
}
const char* str = BLARGG_ERR( BLARGG_ERR_GENERIC, "problem uncompressing LZMA data" );
return str;
}
void LZMA_Inflater::end()
{
if ( deflated_ )
{
deflated_ = false;
lzma_end(&zbuf);
}
buf.clear();
static lzma_stream const empty = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, (lzma_reserved_enum)0, (lzma_reserved_enum)0 };
memcpy( &zbuf, &empty, sizeof zbuf );
}
LZMA_Inflater::LZMA_Inflater()
{
deflated_ = false;
end(); // initialize things
}
LZMA_Inflater::~LZMA_Inflater()
{
end();
}
blargg_err_t LZMA_Inflater::fill_buf( int count )
{
byte* out = buf.end() - count;
RETURN_ERR( callback( user_data, out, &count ) );
zbuf.avail_in = count;
zbuf.next_in = (uint8_t *)out;
return blargg_ok;
}
blargg_err_t LZMA_Inflater::begin( callback_t new_callback, void* new_user_data,
int new_buf_size, int initial_read )
{
callback = new_callback;
user_data = new_user_data;
end();
// TODO: decide whether using different size on alloc failure is a good idea
//RETURN_ERR( buf.resize( new_buf_size ? new_buf_size : 4 * block_size ) );
if ( new_buf_size && buf.resize( new_buf_size ) )
{
ACK_FAILURE();
new_buf_size = 0;
}
if ( !new_buf_size )
{
RETURN_ERR( buf.resize( 4 * block_size ) );
initial_read = 0;
}
// Fill buffer with some data, less than normal buffer size since caller might
// just be examining beginning of file.
return fill_buf( initial_read ? initial_read : block_size );
}
bool LZMA_Inflater::is_format_xz(void)
{
// Specify the magic as hex to be compatible with EBCDIC systems.
static const uint8_t magic[6] = { 0xFD, 0x37, 0x7A, 0x58, 0x5A, 0x00 };
return zbuf.avail_in >= sizeof(magic)
&& memcmp(zbuf.next_in, magic, sizeof(magic)) == 0;
}
/// Return true if the data in in_buf seems to be in the .lzma format.
bool LZMA_Inflater::is_format_lzma(void)
{
// The .lzma header is 13 bytes.
if (zbuf.avail_in < 13)
return false;
// Decode the LZMA1 properties.
lzma_filter filter = { .id = LZMA_FILTER_LZMA1 };
if (lzma_properties_decode(&filter, NULL, zbuf.next_in, 5) != LZMA_OK)
return false;
// A hack to ditch tons of false positives: We allow only dictionary
// sizes that are 2^n or 2^n + 2^(n-1) or UINT32_MAX. LZMA_Alone
// created only files with 2^n, but accepts any dictionary size.
// If someone complains, this will be reconsidered.
lzma_options_lzma *opt = (lzma_options_lzma *)filter.options;
const uint32_t dict_size = opt->dict_size;
free(opt);
if (dict_size != UINT32_MAX) {
uint32_t d = dict_size - 1;
d |= d >> 2;
d |= d >> 3;
d |= d >> 4;
d |= d >> 8;
d |= d >> 16;
++d;
if (d != dict_size || dict_size == 0)
return false;
}
// Another hack to ditch false positives: Assume that if the
// uncompressed size is known, it must be less than 256 GiB.
// Again, if someone complains, this will be reconsidered.
uint64_t uncompressed_size = 0;
for (size_t i = 0; i < 8; ++i)
uncompressed_size |= (uint64_t)(zbuf.next_in[5 + i]) << (i * 8);
if (uncompressed_size != UINT64_MAX
&& uncompressed_size > (UINT64_C(1) << 38))
return false;
return true;
}
/// Return true if the data in in_buf seems to be in the .lz format.
bool LZMA_Inflater::is_format_lzip(void)
{
static const uint8_t magic[4] = { 0x4C, 0x5A, 0x49, 0x50 };
return zbuf.avail_in >= sizeof(magic)
&& memcmp(zbuf.next_in, magic, sizeof(magic)) == 0;
}
blargg_err_t LZMA_Inflater::set_mode( mode_t mode, int data_offset )
{
int err = LZMA_OK;
zbuf.next_in += data_offset;
zbuf.avail_in -= data_offset;
buf_ptr = zbuf.next_in;
if ( mode == mode_auto )
{
// examine buffer for gzip header
mode = mode_copy;
if ( is_format_lzip() ) {
fprintf(stderr, "LZIP detected\n");
mode = mode_unlz;
}
if ( is_format_xz() ) {
fprintf(stderr, "XZ detected\n");
mode = mode_unxz;
}
if ( is_format_lzma() ) {
fprintf(stderr, "LZMA detected\n");
mode = mode_raw_deflate;
}
}
if ( mode != mode_copy )
{
zbuf = LZMA_STREAM_INIT;
if (mode == mode_raw_deflate)
err = lzma_alone_decoder( &zbuf, UINT64_MAX);
else
err = lzma_stream_decoder( &zbuf, UINT64_MAX, LZMA_CONCATENATED);
if (err != LZMA_OK) {
fprintf(stderr, "Couldn't initialize LZMA stream decoder\n");
return blargg_err_file_corrupt;
}
deflated_ = true;
}
mode_ = mode;
return blargg_ok;
}
/*
// Reads/inflates entire stream. All input must be in buffer, and count must be total
// of all output.
blargg_err_t read_all( void* out, int count );
// zlib automatically applies this optimization (uses inflateFast)
// TODO: remove
blargg_err_t LZMA_Inflater::read_all( void* out, int count )
{
int err = LZMA_OK;
if ( deflated_ )
{
zbuf.next_out = (char*) out;
zbuf.avail_out = count;
if ((buf.size() - zbuf.total_out) <= block_size)
action = LZMA_FINISH;
else
action = LZMA_RUN;
err = lzma_code(&zbuf, action);
if ( zbuf.avail_out || err != Z_STREAM_END )
return blargg_err_file_corrupt;
}
else
{
if ( zbuf.avail_in < count )
return blargg_err_file_corrupt;
memcpy( out, zbuf.next_in, count );
zbuf.next_in += count;
zbuf.avail_in -= count;
}
return blargg_ok;
}
*/
blargg_err_t LZMA_Inflater::read( void* out, int* count_io )
{
int remain = *count_io;
zbuf.next_in = buf_ptr;
fprintf(stderr, "LZMA - Read remaining: %d, next in: 0x%p\n", remain, zbuf.next_in);
if ( remain && zbuf.next_in )
{
fprintf(stderr, "LZMA - deflated: %d\n", deflated_);
if ( deflated_ )
{
zbuf.next_out = (uint8_t*) out;
zbuf.avail_out = remain;
while ( 1 )
{
int err = LZMA_OK;
unsigned int old_avail_in = (unsigned int)zbuf.avail_in;
if ((buf.size() - zbuf.total_out) <= block_size)
action = LZMA_FINISH;
else
action = LZMA_RUN;
err = lzma_code(&zbuf, action);
if ( err == LZMA_STREAM_END )
{
remain = zbuf.avail_out;
end();
break; // no more data to inflate
}
if ( err && (err != LZMA_BUF_ERROR || old_avail_in) ) {
fprintf(stderr, "LZMA error: %d, old available in: %d\n", err, old_avail_in);
return get_lzma_err( err );
}
if ( !zbuf.avail_out )
{
remain = 0;
break; // requested number of bytes inflated
}
if ( zbuf.avail_in )
{
fprintf(stderr, "Available in: %d, file corrupt\n");
// inflate() should never leave input if there's still space for output
check( false );
return blargg_err_file_corrupt;
}
RETURN_ERR( fill_buf( (int)buf.size() ) );
if ( !zbuf.avail_in ) {
fprintf(stderr, "No more available input data\n");
return blargg_err_file_corrupt; // stream didn't end but there's no more data
}
}
}
else
{
while ( 1 )
{
// copy buffered data
if ( zbuf.avail_in )
{
long count = zbuf.avail_in;
if ( count > remain )
count = remain;
memcpy( out, zbuf.next_in, count );
zbuf.total_out += count;
out = (char*) out + count;
remain -= count;
zbuf.next_in += count;
zbuf.avail_in -= count;
}
if ( !zbuf.avail_in && zbuf.next_in < (uint8_t *)buf.end() )
{
end();
break;
}
// read large request directly
if ( remain + zbuf.total_out % block_size >= buf.size() )
{
int count = remain;
RETURN_ERR( callback( user_data, out, &count ) );
zbuf.total_out += count;
out = (char*) out + count;
remain -= count;
if ( remain )
{
end();
break;
}
}
if ( !remain )
break;
RETURN_ERR( fill_buf( (int)(buf.size() - zbuf.total_out % block_size) ) );
}
}
}
*count_io -= remain;
return blargg_ok;
}
#endif

View File

@ -0,0 +1,94 @@
// Simplifies use of zlib for inflating data
// File_Extractor 1.0.0
#ifndef LZMA_INFLATER_H
#define LZMA_INFLATER_H
#include <lzma.h>
#include "Data_Reader.h"
#include "blargg_common.h"
class LZMA_Inflater
{
public:
// Reads at most min(*count,bytes_until_eof()) bytes into *out and set *count
// to that number, or returns error if that many can't be read.
typedef blargg_err_t (*callback_t)(void *user_data, void *out, int *count);
// Begins by setting callback and filling buffer. Default buffer is 16K and
// filled to 4K, or specify buf_size and initial_read for custom buffer size
// and how much to read initially.
blargg_err_t begin(callback_t, void *user_data, int buf_size = 0, int initial_read = 0);
// Data read into buffer by begin()
const unsigned char *data() const
{
return (const unsigned char *)zbuf.next_in;
}
int filled() const
{
return zbuf.avail_in;
}
int totalOut() const
{
return zbuf.total_out;
}
// Begins inflation using specified mode. Using mode_auto selects between
// mode_copy and mode_ungz by examining first two bytes of buffer. Use
// buf_offset to specify where data begins in buffer, in case there is
// header data that should be skipped.
enum mode_t { mode_copy, mode_unxz, mode_unlz, mode_raw_deflate, mode_auto };
blargg_err_t set_mode(mode_t, int buf_offset = 0);
// True if set_mode() has been called with mode_ungz or mode_raw_deflate
bool deflated() const
{
return deflated_;
}
// Reads/inflates at most *count_io bytes into *out and sets *count_io to actual
// number of bytes read (less than requested if end of data was reached).
// Buffers source data internally, even in copy mode, so input file can be
// unbuffered without sacrificing performance.
blargg_err_t read(void *out, int *count_io);
// Total number of bytes read since begin()
int tell() const
{
return zbuf.total_out;
}
// Ends inflation and frees memory
void end();
private:
// noncopyable
LZMA_Inflater(const LZMA_Inflater &);
LZMA_Inflater &operator=(const LZMA_Inflater &);
// Implementation
public:
LZMA_Inflater();
~LZMA_Inflater();
bool is_format_xz(void);
bool is_format_lzma(void);
bool is_format_lzip(void);
private:
lzma_stream zbuf;
lzma_action action;
blargg_vector<unsigned char> buf;
bool deflated_;
callback_t callback;
void *user_data;
int mode_;
const uint8_t *buf_ptr;
blargg_err_t fill_buf(int count);
};
#endif

View File

@ -0,0 +1,103 @@
// File_Extractor 1.0.0. http://www.slack.net/~ant/
#if FEX_ENABLE_LZMA
#include "XZ_Extractor.h"
#include <zlib.h>
/* Copyright (C) 2005-2009 Shay Green. This module is free software; you
can redistribute it and/or modify it under the terms of the GNU Lesser
General Public License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version. This
module is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
details. You should have received a copy of the GNU Lesser General Public
License along with this module; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
#include "blargg_source.h"
// TODO: could close file once data has been read into memory
static blargg_err_t init_xz_file()
{
get_crc_table(); // initialize zlib's CRC-32 tables
return blargg_ok;
}
static File_Extractor* new_xz()
{
return BLARGG_NEW XZ_Extractor;
}
fex_type_t_ const fex_xz_type [1] = {{
".xz",
&new_xz,
"xz file",
&init_xz_file
}};
XZ_Extractor::XZ_Extractor() :
File_Extractor( fex_xz_type )
{ }
XZ_Extractor::~XZ_Extractor()
{
close();
}
blargg_err_t XZ_Extractor::open_path_v()
{
// skip opening file
return open_v();
}
blargg_err_t XZ_Extractor::stat_v()
{
RETURN_ERR( open_arc_file( true ) );
if ( !gr.opened() || gr.tell() != 0 )
RETURN_ERR( gr.open( &arc() ) );
set_info( gr.remain(), 0, gr.crc32() );
return blargg_ok;
}
blargg_err_t XZ_Extractor::open_v()
{
// Remove .gz suffix
size_t len = strlen( arc_path() );
if ( fex_has_extension( arc_path(), ".xz" ) )
len -= 3;
RETURN_ERR( name.resize( len + 1 ) );
memcpy( name.begin(), arc_path(), name.size() );
name [name.size() - 1] = '\0';
set_name( name.begin() );
return blargg_ok;
}
void XZ_Extractor::close_v()
{
name.clear();
gr.close();
}
blargg_err_t XZ_Extractor::next_v()
{
return blargg_ok;
}
blargg_err_t XZ_Extractor::rewind_v()
{
set_name( name.begin() );
return blargg_ok;
}
blargg_err_t XZ_Extractor::extract_v( void* p, int n )
{
return gr.read( p, n );
}
#endif

View File

@ -0,0 +1,35 @@
// Presents a gzipped file as an "archive" of just that file.
// Also handles non-gzipped files.
// File_Extractor 1.0.0
#ifndef XZ_EXTRACTOR_H
#define XZ_EXTRACTOR_H
#include "File_Extractor.h"
#include "XZ_Reader.h"
class XZ_Extractor : public File_Extractor
{
public:
XZ_Extractor();
virtual ~XZ_Extractor();
protected:
virtual blargg_err_t open_path_v();
virtual blargg_err_t open_v();
virtual void close_v();
virtual blargg_err_t next_v();
virtual blargg_err_t rewind_v();
virtual blargg_err_t stat_v();
virtual blargg_err_t extract_v(void *, int);
private:
XZ_Reader gr;
blargg_vector<char> name;
void set_info_();
};
#endif

View File

@ -0,0 +1,129 @@
// File_Extractor 1.0.0. http://www.slack.net/~ant/
#if FEX_ENABLE_LZMA
#include <stdio.h>
#include "XZ_Reader.h"
#include "blargg_endian.h"
/* Copyright (C) 2025 Andy Vandijck. This module is free software; you
can redistribute it and/or modify it under the terms of the GNU Lesser
General Public License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version. This
module is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
details. You should have received a copy of the GNU Lesser General Public
License along with this module; if not, write to the Free Software Foundation,
Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */
#include "blargg_source.h"
XZ_Reader::XZ_Reader()
{
close();
}
XZ_Reader::~XZ_Reader()
{ }
static blargg_err_t XZ_reader_read( void* file, void* out, int* count )
{
return STATIC_CAST(File_Reader*,file)->read_avail( out, count );
}
size_t XZ_Reader::get_uncompressed_size()
{
lzma_stream_flags stream_flags;
const uint8_t *footer_ptr = NULL;
const uint8_t *index_ptr = NULL;
const uint8_t *data = (const uint8_t *)malloc(in->size());
if (data == NULL) {
fprintf(stderr, "Error: Couldn't allocate data\n");
return 0;
}
in->seek(0);
in->read((void *)data, in->size());
// 12 is the size of the footer per the file-spec...
footer_ptr = data + (in->size() - 12);
// Decode the footer, so we have the backward_size pointing to the index
(void)lzma_stream_footer_decode(&stream_flags, (const uint8_t *)footer_ptr);
// This is the index pointer, where the size is ultimately stored...
index_ptr = data + ((in->size() - 12) - stream_flags.backward_size);
// Allocate an index
lzma_index *index = lzma_index_init(NULL);
uint64_t memlimit;
size_t in_pos = 0;
// decode the index we calculated
lzma_index_buffer_decode(&index, &memlimit, NULL, (const uint8_t *)index_ptr, &in_pos, footer_ptr - index_ptr);
// Just make sure the whole index was decoded, otherwise, we might be
// dealing with something utterly corrupt
if (in_pos != stream_flags.backward_size) {
lzma_index_end(index, NULL);
free((void *)data);
fprintf(stderr, "Error: input position %u is not equal to backward size %llu\n", in_pos, stream_flags.backward_size);
return 0;
}
// Finally get the size
lzma_vli uSize = lzma_index_uncompressed_size(index);
lzma_index_end(index, NULL);
free((void *)data);
in->seek(0);
return (size_t) uSize;
}
blargg_err_t XZ_Reader::calc_size()
{
size_ = (int)get_uncompressed_size();
fprintf(stderr, "XZ uncompressed size: %d\n", size_);
crc32_ = 0;
return blargg_ok;
}
blargg_err_t XZ_Reader::open( File_Reader* new_in )
{
close();
in = new_in;
RETURN_ERR( in->seek( 0 ) );
RETURN_ERR( inflater.begin( XZ_reader_read, new_in ) );
RETURN_ERR( inflater.set_mode( inflater.mode_auto ) );
RETURN_ERR( calc_size() );
set_remain( size_ );
return blargg_ok;
}
void XZ_Reader::close()
{
in = NULL;
inflater.end();
}
blargg_err_t XZ_Reader::read_v( void* out, int count )
{
assert( in );
int actual = count;
RETURN_ERR( inflater.read( out, &actual ) );
fprintf(stderr, "XZ: Actual read: %d, count: %d\n", actual, count);
if ( actual != count )
return blargg_err_file_corrupt;
return blargg_ok;
}
#endif

View File

@ -0,0 +1,60 @@
// Transparently decompresses gzip files, as well as uncompressed
// File_Extractor 1.0.0
#ifndef XZ_READER_H
#define XZ_READER_H
#include "Data_Reader.h"
#include "LZMA_Inflater.h"
class XZ_Reader : public Data_Reader
{
public:
// Keeps pointer to reader until close(). If
blargg_err_t open(File_Reader *);
// True if file is open
bool opened() const
{
return in != NULL;
}
// Frees memory
void close();
// True if file is compressed
bool deflated() const
{
return inflater.deflated();
}
// CRC-32 of data, of 0 if unavailable
unsigned int crc32() const
{
return crc32_;
}
// Number of bytes read since opening
int tell() const
{
return size_ - remain();
}
public:
XZ_Reader();
virtual ~XZ_Reader();
size_t get_uncompressed_size();
protected:
virtual blargg_err_t read_v(void *, int);
private:
File_Reader *in;
unsigned crc32_;
int size_;
LZMA_Inflater inflater;
blargg_err_t calc_size();
};
#endif

View File

@ -27,7 +27,13 @@
#define FEX_TYPE_BZ2
#endif
#define FEX_TYPE_LIST fex_7z_type, fex_gz_type, fex_zip_type, fex_rar_type, FEX_TYPE_BZ2
#if FEX_ENABLE_LZMA
#define FEX_TYPE_LZMA fex_xz_type,
#else
#define FEX_TYPE_LZMA
#endif
#define FEX_TYPE_LIST fex_7z_type, fex_gz_type, fex_zip_type, fex_rar_type, FEX_TYPE_BZ2 FEX_TYPE_LZMA
// Use standard config.h if present
#ifdef HAVE_CONFIG_H

View File

@ -33,6 +33,9 @@ BLARGG_EXPORT const fex_type_t* fex_type_list( void )
// Modify blargg_config.h to change type list, NOT this file
fex_7z_type,
fex_gz_type,
#if FEX_ENABLE_LZMA
fex_xz_type,
#endif
#if FEX_ENABLE_BZ2
fex_bz2_type,
#endif