mirror of https://github.com/PCSX2/pcsx2.git
Adding zlib example files
This commit is contained in:
parent
a86f2615be
commit
7d491cb230
|
@ -0,0 +1,49 @@
|
|||
This directory contains examples of the use of zlib and other relevant
|
||||
programs and documentation.
|
||||
|
||||
enough.c
|
||||
calculation and justification of ENOUGH parameter in inftrees.h
|
||||
- calculates the maximum table space used in inflate tree
|
||||
construction over all possible Huffman codes
|
||||
|
||||
fitblk.c
|
||||
compress just enough input to nearly fill a requested output size
|
||||
- zlib isn't designed to do this, but fitblk does it anyway
|
||||
|
||||
gun.c
|
||||
uncompress a gzip file
|
||||
- illustrates the use of inflateBack() for high speed file-to-file
|
||||
decompression using call-back functions
|
||||
- is approximately twice as fast as gzip -d
|
||||
- also provides Unix uncompress functionality, again twice as fast
|
||||
|
||||
gzappend.c
|
||||
append to a gzip file
|
||||
- illustrates the use of the Z_BLOCK flush parameter for inflate()
|
||||
- illustrates the use of deflatePrime() to start at any bit
|
||||
|
||||
gzjoin.c
|
||||
join gzip files without recalculating the crc or recompressing
|
||||
- illustrates the use of the Z_BLOCK flush parameter for inflate()
|
||||
- illustrates the use of crc32_combine()
|
||||
|
||||
gzlog.c
|
||||
gzlog.h
|
||||
efficiently and robustly maintain a message log file in gzip format
|
||||
- illustrates use of raw deflate, Z_PARTIAL_FLUSH, deflatePrime(),
|
||||
and deflateSetDictionary()
|
||||
- illustrates use of a gzip header extra field
|
||||
|
||||
zlib_how.html
|
||||
painfully comprehensive description of zpipe.c (see below)
|
||||
- describes in excruciating detail the use of deflate() and inflate()
|
||||
|
||||
zpipe.c
|
||||
reads and writes zlib streams from stdin to stdout
|
||||
- illustrates the proper use of deflate() and inflate()
|
||||
- deeply commented in zlib_how.html (see above)
|
||||
|
||||
zran.c
|
||||
index a zlib or gzip stream and randomly access it
|
||||
- illustrates the use of Z_BLOCK, inflatePrime(), and
|
||||
inflateSetDictionary() to provide random access
|
|
@ -0,0 +1,572 @@
|
|||
/* enough.c -- determine the maximum size of inflate's Huffman code tables over
|
||||
* all possible valid and complete Huffman codes, subject to a length limit.
|
||||
* Copyright (C) 2007, 2008, 2012 Mark Adler
|
||||
* Version 1.4 18 August 2012 Mark Adler
|
||||
*/
|
||||
|
||||
/* Version history:
|
||||
1.0 3 Jan 2007 First version (derived from codecount.c version 1.4)
|
||||
1.1 4 Jan 2007 Use faster incremental table usage computation
|
||||
Prune examine() search on previously visited states
|
||||
1.2 5 Jan 2007 Comments clean up
|
||||
As inflate does, decrease root for short codes
|
||||
Refuse cases where inflate would increase root
|
||||
1.3 17 Feb 2008 Add argument for initial root table size
|
||||
Fix bug for initial root table size == max - 1
|
||||
Use a macro to compute the history index
|
||||
1.4 18 Aug 2012 Avoid shifts more than bits in type (caused endless loop!)
|
||||
Clean up comparisons of different types
|
||||
Clean up code indentation
|
||||
*/
|
||||
|
||||
/*
|
||||
Examine all possible Huffman codes for a given number of symbols and a
|
||||
maximum code length in bits to determine the maximum table size for zilb's
|
||||
inflate. Only complete Huffman codes are counted.
|
||||
|
||||
Two codes are considered distinct if the vectors of the number of codes per
|
||||
length are not identical. So permutations of the symbol assignments result
|
||||
in the same code for the counting, as do permutations of the assignments of
|
||||
the bit values to the codes (i.e. only canonical codes are counted).
|
||||
|
||||
We build a code from shorter to longer lengths, determining how many symbols
|
||||
are coded at each length. At each step, we have how many symbols remain to
|
||||
be coded, what the last code length used was, and how many bit patterns of
|
||||
that length remain unused. Then we add one to the code length and double the
|
||||
number of unused patterns to graduate to the next code length. We then
|
||||
assign all portions of the remaining symbols to that code length that
|
||||
preserve the properties of a correct and eventually complete code. Those
|
||||
properties are: we cannot use more bit patterns than are available; and when
|
||||
all the symbols are used, there are exactly zero possible bit patterns
|
||||
remaining.
|
||||
|
||||
The inflate Huffman decoding algorithm uses two-level lookup tables for
|
||||
speed. There is a single first-level table to decode codes up to root bits
|
||||
in length (root == 9 in the current inflate implementation). The table
|
||||
has 1 << root entries and is indexed by the next root bits of input. Codes
|
||||
shorter than root bits have replicated table entries, so that the correct
|
||||
entry is pointed to regardless of the bits that follow the short code. If
|
||||
the code is longer than root bits, then the table entry points to a second-
|
||||
level table. The size of that table is determined by the longest code with
|
||||
that root-bit prefix. If that longest code has length len, then the table
|
||||
has size 1 << (len - root), to index the remaining bits in that set of
|
||||
codes. Each subsequent root-bit prefix then has its own sub-table. The
|
||||
total number of table entries required by the code is calculated
|
||||
incrementally as the number of codes at each bit length is populated. When
|
||||
all of the codes are shorter than root bits, then root is reduced to the
|
||||
longest code length, resulting in a single, smaller, one-level table.
|
||||
|
||||
The inflate algorithm also provides for small values of root (relative to
|
||||
the log2 of the number of symbols), where the shortest code has more bits
|
||||
than root. In that case, root is increased to the length of the shortest
|
||||
code. This program, by design, does not handle that case, so it is verified
|
||||
that the number of symbols is less than 2^(root + 1).
|
||||
|
||||
In order to speed up the examination (by about ten orders of magnitude for
|
||||
the default arguments), the intermediate states in the build-up of a code
|
||||
are remembered and previously visited branches are pruned. The memory
|
||||
required for this will increase rapidly with the total number of symbols and
|
||||
the maximum code length in bits. However this is a very small price to pay
|
||||
for the vast speedup.
|
||||
|
||||
First, all of the possible Huffman codes are counted, and reachable
|
||||
intermediate states are noted by a non-zero count in a saved-results array.
|
||||
Second, the intermediate states that lead to (root + 1) bit or longer codes
|
||||
are used to look at all sub-codes from those junctures for their inflate
|
||||
memory usage. (The amount of memory used is not affected by the number of
|
||||
codes of root bits or less in length.) Third, the visited states in the
|
||||
construction of those sub-codes and the associated calculation of the table
|
||||
size is recalled in order to avoid recalculating from the same juncture.
|
||||
Beginning the code examination at (root + 1) bit codes, which is enabled by
|
||||
identifying the reachable nodes, accounts for about six of the orders of
|
||||
magnitude of improvement for the default arguments. About another four
|
||||
orders of magnitude come from not revisiting previous states. Out of
|
||||
approximately 2x10^16 possible Huffman codes, only about 2x10^6 sub-codes
|
||||
need to be examined to cover all of the possible table memory usage cases
|
||||
for the default arguments of 286 symbols limited to 15-bit codes.
|
||||
|
||||
Note that an unsigned long long type is used for counting. It is quite easy
|
||||
to exceed the capacity of an eight-byte integer with a large number of
|
||||
symbols and a large maximum code length, so multiple-precision arithmetic
|
||||
would need to replace the unsigned long long arithmetic in that case. This
|
||||
program will abort if an overflow occurs. The big_t type identifies where
|
||||
the counting takes place.
|
||||
|
||||
An unsigned long long type is also used for calculating the number of
|
||||
possible codes remaining at the maximum length. This limits the maximum
|
||||
code length to the number of bits in a long long minus the number of bits
|
||||
needed to represent the symbols in a flat code. The code_t type identifies
|
||||
where the bit pattern counting takes place.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
|
||||
#define local static
|
||||
|
||||
/* special data types */
|
||||
typedef unsigned long long big_t; /* type for code counting */
|
||||
typedef unsigned long long code_t; /* type for bit pattern counting */
|
||||
struct tab { /* type for been here check */
|
||||
size_t len; /* length of bit vector in char's */
|
||||
char *vec; /* allocated bit vector */
|
||||
};
|
||||
|
||||
/* The array for saving results, num[], is indexed with this triplet:
|
||||
|
||||
syms: number of symbols remaining to code
|
||||
left: number of available bit patterns at length len
|
||||
len: number of bits in the codes currently being assigned
|
||||
|
||||
Those indices are constrained thusly when saving results:
|
||||
|
||||
syms: 3..totsym (totsym == total symbols to code)
|
||||
left: 2..syms - 1, but only the evens (so syms == 8 -> 2, 4, 6)
|
||||
len: 1..max - 1 (max == maximum code length in bits)
|
||||
|
||||
syms == 2 is not saved since that immediately leads to a single code. left
|
||||
must be even, since it represents the number of available bit patterns at
|
||||
the current length, which is double the number at the previous length.
|
||||
left ends at syms-1 since left == syms immediately results in a single code.
|
||||
(left > sym is not allowed since that would result in an incomplete code.)
|
||||
len is less than max, since the code completes immediately when len == max.
|
||||
|
||||
The offset into the array is calculated for the three indices with the
|
||||
first one (syms) being outermost, and the last one (len) being innermost.
|
||||
We build the array with length max-1 lists for the len index, with syms-3
|
||||
of those for each symbol. There are totsym-2 of those, with each one
|
||||
varying in length as a function of sym. See the calculation of index in
|
||||
count() for the index, and the calculation of size in main() for the size
|
||||
of the array.
|
||||
|
||||
For the deflate example of 286 symbols limited to 15-bit codes, the array
|
||||
has 284,284 entries, taking up 2.17 MB for an 8-byte big_t. More than
|
||||
half of the space allocated for saved results is actually used -- not all
|
||||
possible triplets are reached in the generation of valid Huffman codes.
|
||||
*/
|
||||
|
||||
/* The array for tracking visited states, done[], is itself indexed identically
|
||||
to the num[] array as described above for the (syms, left, len) triplet.
|
||||
Each element in the array is further indexed by the (mem, rem) doublet,
|
||||
where mem is the amount of inflate table space used so far, and rem is the
|
||||
remaining unused entries in the current inflate sub-table. Each indexed
|
||||
element is simply one bit indicating whether the state has been visited or
|
||||
not. Since the ranges for mem and rem are not known a priori, each bit
|
||||
vector is of a variable size, and grows as needed to accommodate the visited
|
||||
states. mem and rem are used to calculate a single index in a triangular
|
||||
array. Since the range of mem is expected in the default case to be about
|
||||
ten times larger than the range of rem, the array is skewed to reduce the
|
||||
memory usage, with eight times the range for mem than for rem. See the
|
||||
calculations for offset and bit in beenhere() for the details.
|
||||
|
||||
For the deflate example of 286 symbols limited to 15-bit codes, the bit
|
||||
vectors grow to total approximately 21 MB, in addition to the 4.3 MB done[]
|
||||
array itself.
|
||||
*/
|
||||
|
||||
/* Globals to avoid propagating constants or constant pointers recursively */
|
||||
local int max; /* maximum allowed bit length for the codes */
|
||||
local int root; /* size of base code table in bits */
|
||||
local int large; /* largest code table so far */
|
||||
local size_t size; /* number of elements in num and done */
|
||||
local int *code; /* number of symbols assigned to each bit length */
|
||||
local big_t *num; /* saved results array for code counting */
|
||||
local struct tab *done; /* states already evaluated array */
|
||||
|
||||
/* Index function for num[] and done[] */
|
||||
#define INDEX(i,j,k) (((size_t)((i-1)>>1)*((i-2)>>1)+(j>>1)-1)*(max-1)+k-1)
|
||||
|
||||
/* Free allocated space. Uses globals code, num, and done. */
|
||||
local void cleanup(void)
|
||||
{
|
||||
size_t n;
|
||||
|
||||
if (done != NULL) {
|
||||
for (n = 0; n < size; n++)
|
||||
if (done[n].len)
|
||||
free(done[n].vec);
|
||||
free(done);
|
||||
}
|
||||
if (num != NULL)
|
||||
free(num);
|
||||
if (code != NULL)
|
||||
free(code);
|
||||
}
|
||||
|
||||
/* Return the number of possible Huffman codes using bit patterns of lengths
|
||||
len through max inclusive, coding syms symbols, with left bit patterns of
|
||||
length len unused -- return -1 if there is an overflow in the counting.
|
||||
Keep a record of previous results in num to prevent repeating the same
|
||||
calculation. Uses the globals max and num. */
|
||||
local big_t count(int syms, int len, int left)
|
||||
{
|
||||
big_t sum; /* number of possible codes from this juncture */
|
||||
big_t got; /* value returned from count() */
|
||||
int least; /* least number of syms to use at this juncture */
|
||||
int most; /* most number of syms to use at this juncture */
|
||||
int use; /* number of bit patterns to use in next call */
|
||||
size_t index; /* index of this case in *num */
|
||||
|
||||
/* see if only one possible code */
|
||||
if (syms == left)
|
||||
return 1;
|
||||
|
||||
/* note and verify the expected state */
|
||||
assert(syms > left && left > 0 && len < max);
|
||||
|
||||
/* see if we've done this one already */
|
||||
index = INDEX(syms, left, len);
|
||||
got = num[index];
|
||||
if (got)
|
||||
return got; /* we have -- return the saved result */
|
||||
|
||||
/* we need to use at least this many bit patterns so that the code won't be
|
||||
incomplete at the next length (more bit patterns than symbols) */
|
||||
least = (left << 1) - syms;
|
||||
if (least < 0)
|
||||
least = 0;
|
||||
|
||||
/* we can use at most this many bit patterns, lest there not be enough
|
||||
available for the remaining symbols at the maximum length (if there were
|
||||
no limit to the code length, this would become: most = left - 1) */
|
||||
most = (((code_t)left << (max - len)) - syms) /
|
||||
(((code_t)1 << (max - len)) - 1);
|
||||
|
||||
/* count all possible codes from this juncture and add them up */
|
||||
sum = 0;
|
||||
for (use = least; use <= most; use++) {
|
||||
got = count(syms - use, len + 1, (left - use) << 1);
|
||||
sum += got;
|
||||
if (got == (big_t)0 - 1 || sum < got) /* overflow */
|
||||
return (big_t)0 - 1;
|
||||
}
|
||||
|
||||
/* verify that all recursive calls are productive */
|
||||
assert(sum != 0);
|
||||
|
||||
/* save the result and return it */
|
||||
num[index] = sum;
|
||||
return sum;
|
||||
}
|
||||
|
||||
/* Return true if we've been here before, set to true if not. Set a bit in a
|
||||
bit vector to indicate visiting this state. Each (syms,len,left) state
|
||||
has a variable size bit vector indexed by (mem,rem). The bit vector is
|
||||
lengthened if needed to allow setting the (mem,rem) bit. */
|
||||
local int beenhere(int syms, int len, int left, int mem, int rem)
|
||||
{
|
||||
size_t index; /* index for this state's bit vector */
|
||||
size_t offset; /* offset in this state's bit vector */
|
||||
int bit; /* mask for this state's bit */
|
||||
size_t length; /* length of the bit vector in bytes */
|
||||
char *vector; /* new or enlarged bit vector */
|
||||
|
||||
/* point to vector for (syms,left,len), bit in vector for (mem,rem) */
|
||||
index = INDEX(syms, left, len);
|
||||
mem -= 1 << root;
|
||||
offset = (mem >> 3) + rem;
|
||||
offset = ((offset * (offset + 1)) >> 1) + rem;
|
||||
bit = 1 << (mem & 7);
|
||||
|
||||
/* see if we've been here */
|
||||
length = done[index].len;
|
||||
if (offset < length && (done[index].vec[offset] & bit) != 0)
|
||||
return 1; /* done this! */
|
||||
|
||||
/* we haven't been here before -- set the bit to show we have now */
|
||||
|
||||
/* see if we need to lengthen the vector in order to set the bit */
|
||||
if (length <= offset) {
|
||||
/* if we have one already, enlarge it, zero out the appended space */
|
||||
if (length) {
|
||||
do {
|
||||
length <<= 1;
|
||||
} while (length <= offset);
|
||||
vector = realloc(done[index].vec, length);
|
||||
if (vector != NULL)
|
||||
memset(vector + done[index].len, 0, length - done[index].len);
|
||||
}
|
||||
|
||||
/* otherwise we need to make a new vector and zero it out */
|
||||
else {
|
||||
length = 1 << (len - root);
|
||||
while (length <= offset)
|
||||
length <<= 1;
|
||||
vector = calloc(length, sizeof(char));
|
||||
}
|
||||
|
||||
/* in either case, bail if we can't get the memory */
|
||||
if (vector == NULL) {
|
||||
fputs("abort: unable to allocate enough memory\n", stderr);
|
||||
cleanup();
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* install the new vector */
|
||||
done[index].len = length;
|
||||
done[index].vec = vector;
|
||||
}
|
||||
|
||||
/* set the bit */
|
||||
done[index].vec[offset] |= bit;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Examine all possible codes from the given node (syms, len, left). Compute
|
||||
the amount of memory required to build inflate's decoding tables, where the
|
||||
number of code structures used so far is mem, and the number remaining in
|
||||
the current sub-table is rem. Uses the globals max, code, root, large, and
|
||||
done. */
|
||||
local void examine(int syms, int len, int left, int mem, int rem)
|
||||
{
|
||||
int least; /* least number of syms to use at this juncture */
|
||||
int most; /* most number of syms to use at this juncture */
|
||||
int use; /* number of bit patterns to use in next call */
|
||||
|
||||
/* see if we have a complete code */
|
||||
if (syms == left) {
|
||||
/* set the last code entry */
|
||||
code[len] = left;
|
||||
|
||||
/* complete computation of memory used by this code */
|
||||
while (rem < left) {
|
||||
left -= rem;
|
||||
rem = 1 << (len - root);
|
||||
mem += rem;
|
||||
}
|
||||
assert(rem == left);
|
||||
|
||||
/* if this is a new maximum, show the entries used and the sub-code */
|
||||
if (mem > large) {
|
||||
large = mem;
|
||||
printf("max %d: ", mem);
|
||||
for (use = root + 1; use <= max; use++)
|
||||
if (code[use])
|
||||
printf("%d[%d] ", code[use], use);
|
||||
putchar('\n');
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
/* remove entries as we drop back down in the recursion */
|
||||
code[len] = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
/* prune the tree if we can */
|
||||
if (beenhere(syms, len, left, mem, rem))
|
||||
return;
|
||||
|
||||
/* we need to use at least this many bit patterns so that the code won't be
|
||||
incomplete at the next length (more bit patterns than symbols) */
|
||||
least = (left << 1) - syms;
|
||||
if (least < 0)
|
||||
least = 0;
|
||||
|
||||
/* we can use at most this many bit patterns, lest there not be enough
|
||||
available for the remaining symbols at the maximum length (if there were
|
||||
no limit to the code length, this would become: most = left - 1) */
|
||||
most = (((code_t)left << (max - len)) - syms) /
|
||||
(((code_t)1 << (max - len)) - 1);
|
||||
|
||||
/* occupy least table spaces, creating new sub-tables as needed */
|
||||
use = least;
|
||||
while (rem < use) {
|
||||
use -= rem;
|
||||
rem = 1 << (len - root);
|
||||
mem += rem;
|
||||
}
|
||||
rem -= use;
|
||||
|
||||
/* examine codes from here, updating table space as we go */
|
||||
for (use = least; use <= most; use++) {
|
||||
code[len] = use;
|
||||
examine(syms - use, len + 1, (left - use) << 1,
|
||||
mem + (rem ? 1 << (len - root) : 0), rem << 1);
|
||||
if (rem == 0) {
|
||||
rem = 1 << (len - root);
|
||||
mem += rem;
|
||||
}
|
||||
rem--;
|
||||
}
|
||||
|
||||
/* remove entries as we drop back down in the recursion */
|
||||
code[len] = 0;
|
||||
}
|
||||
|
||||
/* Look at all sub-codes starting with root + 1 bits. Look at only the valid
|
||||
intermediate code states (syms, left, len). For each completed code,
|
||||
calculate the amount of memory required by inflate to build the decoding
|
||||
tables. Find the maximum amount of memory required and show the code that
|
||||
requires that maximum. Uses the globals max, root, and num. */
|
||||
local void enough(int syms)
|
||||
{
|
||||
int n; /* number of remaing symbols for this node */
|
||||
int left; /* number of unused bit patterns at this length */
|
||||
size_t index; /* index of this case in *num */
|
||||
|
||||
/* clear code */
|
||||
for (n = 0; n <= max; n++)
|
||||
code[n] = 0;
|
||||
|
||||
/* look at all (root + 1) bit and longer codes */
|
||||
large = 1 << root; /* base table */
|
||||
if (root < max) /* otherwise, there's only a base table */
|
||||
for (n = 3; n <= syms; n++)
|
||||
for (left = 2; left < n; left += 2)
|
||||
{
|
||||
/* look at all reachable (root + 1) bit nodes, and the
|
||||
resulting codes (complete at root + 2 or more) */
|
||||
index = INDEX(n, left, root + 1);
|
||||
if (root + 1 < max && num[index]) /* reachable node */
|
||||
examine(n, root + 1, left, 1 << root, 0);
|
||||
|
||||
/* also look at root bit codes with completions at root + 1
|
||||
bits (not saved in num, since complete), just in case */
|
||||
if (num[index - 1] && n <= left << 1)
|
||||
examine((n - left) << 1, root + 1, (n - left) << 1,
|
||||
1 << root, 0);
|
||||
}
|
||||
|
||||
/* done */
|
||||
printf("done: maximum of %d table entries\n", large);
|
||||
}
|
||||
|
||||
/*
|
||||
Examine and show the total number of possible Huffman codes for a given
|
||||
maximum number of symbols, initial root table size, and maximum code length
|
||||
in bits -- those are the command arguments in that order. The default
|
||||
values are 286, 9, and 15 respectively, for the deflate literal/length code.
|
||||
The possible codes are counted for each number of coded symbols from two to
|
||||
the maximum. The counts for each of those and the total number of codes are
|
||||
shown. The maximum number of inflate table entires is then calculated
|
||||
across all possible codes. Each new maximum number of table entries and the
|
||||
associated sub-code (starting at root + 1 == 10 bits) is shown.
|
||||
|
||||
To count and examine Huffman codes that are not length-limited, provide a
|
||||
maximum length equal to the number of symbols minus one.
|
||||
|
||||
For the deflate literal/length code, use "enough". For the deflate distance
|
||||
code, use "enough 30 6".
|
||||
|
||||
This uses the %llu printf format to print big_t numbers, which assumes that
|
||||
big_t is an unsigned long long. If the big_t type is changed (for example
|
||||
to a multiple precision type), the method of printing will also need to be
|
||||
updated.
|
||||
*/
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int syms; /* total number of symbols to code */
|
||||
int n; /* number of symbols to code for this run */
|
||||
big_t got; /* return value of count() */
|
||||
big_t sum; /* accumulated number of codes over n */
|
||||
code_t word; /* for counting bits in code_t */
|
||||
|
||||
/* set up globals for cleanup() */
|
||||
code = NULL;
|
||||
num = NULL;
|
||||
done = NULL;
|
||||
|
||||
/* get arguments -- default to the deflate literal/length code */
|
||||
syms = 286;
|
||||
root = 9;
|
||||
max = 15;
|
||||
if (argc > 1) {
|
||||
syms = atoi(argv[1]);
|
||||
if (argc > 2) {
|
||||
root = atoi(argv[2]);
|
||||
if (argc > 3)
|
||||
max = atoi(argv[3]);
|
||||
}
|
||||
}
|
||||
if (argc > 4 || syms < 2 || root < 1 || max < 1) {
|
||||
fputs("invalid arguments, need: [sym >= 2 [root >= 1 [max >= 1]]]\n",
|
||||
stderr);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* if not restricting the code length, the longest is syms - 1 */
|
||||
if (max > syms - 1)
|
||||
max = syms - 1;
|
||||
|
||||
/* determine the number of bits in a code_t */
|
||||
for (n = 0, word = 1; word; n++, word <<= 1)
|
||||
;
|
||||
|
||||
/* make sure that the calculation of most will not overflow */
|
||||
if (max > n || (code_t)(syms - 2) >= (((code_t)0 - 1) >> (max - 1))) {
|
||||
fputs("abort: code length too long for internal types\n", stderr);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* reject impossible code requests */
|
||||
if ((code_t)(syms - 1) > ((code_t)1 << max) - 1) {
|
||||
fprintf(stderr, "%d symbols cannot be coded in %d bits\n",
|
||||
syms, max);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* allocate code vector */
|
||||
code = calloc(max + 1, sizeof(int));
|
||||
if (code == NULL) {
|
||||
fputs("abort: unable to allocate enough memory\n", stderr);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* determine size of saved results array, checking for overflows,
|
||||
allocate and clear the array (set all to zero with calloc()) */
|
||||
if (syms == 2) /* iff max == 1 */
|
||||
num = NULL; /* won't be saving any results */
|
||||
else {
|
||||
size = syms >> 1;
|
||||
if (size > ((size_t)0 - 1) / (n = (syms - 1) >> 1) ||
|
||||
(size *= n, size > ((size_t)0 - 1) / (n = max - 1)) ||
|
||||
(size *= n, size > ((size_t)0 - 1) / sizeof(big_t)) ||
|
||||
(num = calloc(size, sizeof(big_t))) == NULL) {
|
||||
fputs("abort: unable to allocate enough memory\n", stderr);
|
||||
cleanup();
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* count possible codes for all numbers of symbols, add up counts */
|
||||
sum = 0;
|
||||
for (n = 2; n <= syms; n++) {
|
||||
got = count(n, 1, 2);
|
||||
sum += got;
|
||||
if (got == (big_t)0 - 1 || sum < got) { /* overflow */
|
||||
fputs("abort: can't count that high!\n", stderr);
|
||||
cleanup();
|
||||
return 1;
|
||||
}
|
||||
printf("%llu %d-codes\n", got, n);
|
||||
}
|
||||
printf("%llu total codes for 2 to %d symbols", sum, syms);
|
||||
if (max < syms - 1)
|
||||
printf(" (%d-bit length limit)\n", max);
|
||||
else
|
||||
puts(" (no length limit)");
|
||||
|
||||
/* allocate and clear done array for beenhere() */
|
||||
if (syms == 2)
|
||||
done = NULL;
|
||||
else if (size > ((size_t)0 - 1) / sizeof(struct tab) ||
|
||||
(done = calloc(size, sizeof(struct tab))) == NULL) {
|
||||
fputs("abort: unable to allocate enough memory\n", stderr);
|
||||
cleanup();
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* find and show maximum inflate table usage */
|
||||
if (root > max) /* reduce root to max length */
|
||||
root = max;
|
||||
if ((code_t)syms < ((code_t)1 << (root + 1)))
|
||||
enough(syms);
|
||||
else
|
||||
puts("cannot handle minimum code lengths > root");
|
||||
|
||||
/* done */
|
||||
cleanup();
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,233 @@
|
|||
/* fitblk.c: example of fitting compressed output to a specified size
|
||||
Not copyrighted -- provided to the public domain
|
||||
Version 1.1 25 November 2004 Mark Adler */
|
||||
|
||||
/* Version history:
|
||||
1.0 24 Nov 2004 First version
|
||||
1.1 25 Nov 2004 Change deflateInit2() to deflateInit()
|
||||
Use fixed-size, stack-allocated raw buffers
|
||||
Simplify code moving compression to subroutines
|
||||
Use assert() for internal errors
|
||||
Add detailed description of approach
|
||||
*/
|
||||
|
||||
/* Approach to just fitting a requested compressed size:
|
||||
|
||||
fitblk performs three compression passes on a portion of the input
|
||||
data in order to determine how much of that input will compress to
|
||||
nearly the requested output block size. The first pass generates
|
||||
enough deflate blocks to produce output to fill the requested
|
||||
output size plus a specfied excess amount (see the EXCESS define
|
||||
below). The last deflate block may go quite a bit past that, but
|
||||
is discarded. The second pass decompresses and recompresses just
|
||||
the compressed data that fit in the requested plus excess sized
|
||||
buffer. The deflate process is terminated after that amount of
|
||||
input, which is less than the amount consumed on the first pass.
|
||||
The last deflate block of the result will be of a comparable size
|
||||
to the final product, so that the header for that deflate block and
|
||||
the compression ratio for that block will be about the same as in
|
||||
the final product. The third compression pass decompresses the
|
||||
result of the second step, but only the compressed data up to the
|
||||
requested size minus an amount to allow the compressed stream to
|
||||
complete (see the MARGIN define below). That will result in a
|
||||
final compressed stream whose length is less than or equal to the
|
||||
requested size. Assuming sufficient input and a requested size
|
||||
greater than a few hundred bytes, the shortfall will typically be
|
||||
less than ten bytes.
|
||||
|
||||
If the input is short enough that the first compression completes
|
||||
before filling the requested output size, then that compressed
|
||||
stream is return with no recompression.
|
||||
|
||||
EXCESS is chosen to be just greater than the shortfall seen in a
|
||||
two pass approach similar to the above. That shortfall is due to
|
||||
the last deflate block compressing more efficiently with a smaller
|
||||
header on the second pass. EXCESS is set to be large enough so
|
||||
that there is enough uncompressed data for the second pass to fill
|
||||
out the requested size, and small enough so that the final deflate
|
||||
block of the second pass will be close in size to the final deflate
|
||||
block of the third and final pass. MARGIN is chosen to be just
|
||||
large enough to assure that the final compression has enough room
|
||||
to complete in all cases.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include "zlib.h"
|
||||
|
||||
#define local static
|
||||
|
||||
/* print nastygram and leave */
|
||||
local void quit(char *why)
|
||||
{
|
||||
fprintf(stderr, "fitblk abort: %s\n", why);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
#define RAWLEN 4096 /* intermediate uncompressed buffer size */
|
||||
|
||||
/* compress from file to def until provided buffer is full or end of
|
||||
input reached; return last deflate() return value, or Z_ERRNO if
|
||||
there was read error on the file */
|
||||
local int partcompress(FILE *in, z_streamp def)
|
||||
{
|
||||
int ret, flush;
|
||||
unsigned char raw[RAWLEN];
|
||||
|
||||
flush = Z_NO_FLUSH;
|
||||
do {
|
||||
def->avail_in = fread(raw, 1, RAWLEN, in);
|
||||
if (ferror(in))
|
||||
return Z_ERRNO;
|
||||
def->next_in = raw;
|
||||
if (feof(in))
|
||||
flush = Z_FINISH;
|
||||
ret = deflate(def, flush);
|
||||
assert(ret != Z_STREAM_ERROR);
|
||||
} while (def->avail_out != 0 && flush == Z_NO_FLUSH);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* recompress from inf's input to def's output; the input for inf and
|
||||
the output for def are set in those structures before calling;
|
||||
return last deflate() return value, or Z_MEM_ERROR if inflate()
|
||||
was not able to allocate enough memory when it needed to */
|
||||
local int recompress(z_streamp inf, z_streamp def)
|
||||
{
|
||||
int ret, flush;
|
||||
unsigned char raw[RAWLEN];
|
||||
|
||||
flush = Z_NO_FLUSH;
|
||||
do {
|
||||
/* decompress */
|
||||
inf->avail_out = RAWLEN;
|
||||
inf->next_out = raw;
|
||||
ret = inflate(inf, Z_NO_FLUSH);
|
||||
assert(ret != Z_STREAM_ERROR && ret != Z_DATA_ERROR &&
|
||||
ret != Z_NEED_DICT);
|
||||
if (ret == Z_MEM_ERROR)
|
||||
return ret;
|
||||
|
||||
/* compress what was decompresed until done or no room */
|
||||
def->avail_in = RAWLEN - inf->avail_out;
|
||||
def->next_in = raw;
|
||||
if (inf->avail_out != 0)
|
||||
flush = Z_FINISH;
|
||||
ret = deflate(def, flush);
|
||||
assert(ret != Z_STREAM_ERROR);
|
||||
} while (ret != Z_STREAM_END && def->avail_out != 0);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define EXCESS 256 /* empirically determined stream overage */
|
||||
#define MARGIN 8 /* amount to back off for completion */
|
||||
|
||||
/* compress from stdin to fixed-size block on stdout */
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int ret; /* return code */
|
||||
unsigned size; /* requested fixed output block size */
|
||||
unsigned have; /* bytes written by deflate() call */
|
||||
unsigned char *blk; /* intermediate and final stream */
|
||||
unsigned char *tmp; /* close to desired size stream */
|
||||
z_stream def, inf; /* zlib deflate and inflate states */
|
||||
|
||||
/* get requested output size */
|
||||
if (argc != 2)
|
||||
quit("need one argument: size of output block");
|
||||
ret = strtol(argv[1], argv + 1, 10);
|
||||
if (argv[1][0] != 0)
|
||||
quit("argument must be a number");
|
||||
if (ret < 8) /* 8 is minimum zlib stream size */
|
||||
quit("need positive size of 8 or greater");
|
||||
size = (unsigned)ret;
|
||||
|
||||
/* allocate memory for buffers and compression engine */
|
||||
blk = malloc(size + EXCESS);
|
||||
def.zalloc = Z_NULL;
|
||||
def.zfree = Z_NULL;
|
||||
def.opaque = Z_NULL;
|
||||
ret = deflateInit(&def, Z_DEFAULT_COMPRESSION);
|
||||
if (ret != Z_OK || blk == NULL)
|
||||
quit("out of memory");
|
||||
|
||||
/* compress from stdin until output full, or no more input */
|
||||
def.avail_out = size + EXCESS;
|
||||
def.next_out = blk;
|
||||
ret = partcompress(stdin, &def);
|
||||
if (ret == Z_ERRNO)
|
||||
quit("error reading input");
|
||||
|
||||
/* if it all fit, then size was undersubscribed -- done! */
|
||||
if (ret == Z_STREAM_END && def.avail_out >= EXCESS) {
|
||||
/* write block to stdout */
|
||||
have = size + EXCESS - def.avail_out;
|
||||
if (fwrite(blk, 1, have, stdout) != have || ferror(stdout))
|
||||
quit("error writing output");
|
||||
|
||||
/* clean up and print results to stderr */
|
||||
ret = deflateEnd(&def);
|
||||
assert(ret != Z_STREAM_ERROR);
|
||||
free(blk);
|
||||
fprintf(stderr,
|
||||
"%u bytes unused out of %u requested (all input)\n",
|
||||
size - have, size);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* it didn't all fit -- set up for recompression */
|
||||
inf.zalloc = Z_NULL;
|
||||
inf.zfree = Z_NULL;
|
||||
inf.opaque = Z_NULL;
|
||||
inf.avail_in = 0;
|
||||
inf.next_in = Z_NULL;
|
||||
ret = inflateInit(&inf);
|
||||
tmp = malloc(size + EXCESS);
|
||||
if (ret != Z_OK || tmp == NULL)
|
||||
quit("out of memory");
|
||||
ret = deflateReset(&def);
|
||||
assert(ret != Z_STREAM_ERROR);
|
||||
|
||||
/* do first recompression close to the right amount */
|
||||
inf.avail_in = size + EXCESS;
|
||||
inf.next_in = blk;
|
||||
def.avail_out = size + EXCESS;
|
||||
def.next_out = tmp;
|
||||
ret = recompress(&inf, &def);
|
||||
if (ret == Z_MEM_ERROR)
|
||||
quit("out of memory");
|
||||
|
||||
/* set up for next reocmpression */
|
||||
ret = inflateReset(&inf);
|
||||
assert(ret != Z_STREAM_ERROR);
|
||||
ret = deflateReset(&def);
|
||||
assert(ret != Z_STREAM_ERROR);
|
||||
|
||||
/* do second and final recompression (third compression) */
|
||||
inf.avail_in = size - MARGIN; /* assure stream will complete */
|
||||
inf.next_in = tmp;
|
||||
def.avail_out = size;
|
||||
def.next_out = blk;
|
||||
ret = recompress(&inf, &def);
|
||||
if (ret == Z_MEM_ERROR)
|
||||
quit("out of memory");
|
||||
assert(ret == Z_STREAM_END); /* otherwise MARGIN too small */
|
||||
|
||||
/* done -- write block to stdout */
|
||||
have = size - def.avail_out;
|
||||
if (fwrite(blk, 1, have, stdout) != have || ferror(stdout))
|
||||
quit("error writing output");
|
||||
|
||||
/* clean up and print results to stderr */
|
||||
free(tmp);
|
||||
ret = inflateEnd(&inf);
|
||||
assert(ret != Z_STREAM_ERROR);
|
||||
ret = deflateEnd(&def);
|
||||
assert(ret != Z_STREAM_ERROR);
|
||||
free(blk);
|
||||
fprintf(stderr,
|
||||
"%u bytes unused out of %u requested (%lu input)\n",
|
||||
size - have, size, def.total_in);
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,702 @@
|
|||
/* gun.c -- simple gunzip to give an example of the use of inflateBack()
|
||||
* Copyright (C) 2003, 2005, 2008, 2010, 2012 Mark Adler
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
Version 1.7 12 August 2012 Mark Adler */
|
||||
|
||||
/* Version history:
|
||||
1.0 16 Feb 2003 First version for testing of inflateBack()
|
||||
1.1 21 Feb 2005 Decompress concatenated gzip streams
|
||||
Remove use of "this" variable (C++ keyword)
|
||||
Fix return value for in()
|
||||
Improve allocation failure checking
|
||||
Add typecasting for void * structures
|
||||
Add -h option for command version and usage
|
||||
Add a bunch of comments
|
||||
1.2 20 Mar 2005 Add Unix compress (LZW) decompression
|
||||
Copy file attributes from input file to output file
|
||||
1.3 12 Jun 2005 Add casts for error messages [Oberhumer]
|
||||
1.4 8 Dec 2006 LZW decompression speed improvements
|
||||
1.5 9 Feb 2008 Avoid warning in latest version of gcc
|
||||
1.6 17 Jan 2010 Avoid signed/unsigned comparison warnings
|
||||
1.7 12 Aug 2012 Update for z_const usage in zlib 1.2.8
|
||||
*/
|
||||
|
||||
/*
|
||||
gun [ -t ] [ name ... ]
|
||||
|
||||
decompresses the data in the named gzip files. If no arguments are given,
|
||||
gun will decompress from stdin to stdout. The names must end in .gz, -gz,
|
||||
.z, -z, _z, or .Z. The uncompressed data will be written to a file name
|
||||
with the suffix stripped. On success, the original file is deleted. On
|
||||
failure, the output file is deleted. For most failures, the command will
|
||||
continue to process the remaining names on the command line. A memory
|
||||
allocation failure will abort the command. If -t is specified, then the
|
||||
listed files or stdin will be tested as gzip files for integrity (without
|
||||
checking for a proper suffix), no output will be written, and no files
|
||||
will be deleted.
|
||||
|
||||
Like gzip, gun allows concatenated gzip streams and will decompress them,
|
||||
writing all of the uncompressed data to the output. Unlike gzip, gun allows
|
||||
an empty file on input, and will produce no error writing an empty output
|
||||
file.
|
||||
|
||||
gun will also decompress files made by Unix compress, which uses LZW
|
||||
compression. These files are automatically detected by virtue of their
|
||||
magic header bytes. Since the end of Unix compress stream is marked by the
|
||||
end-of-file, they cannot be concantenated. If a Unix compress stream is
|
||||
encountered in an input file, it is the last stream in that file.
|
||||
|
||||
Like gunzip and uncompress, the file attributes of the orignal compressed
|
||||
file are maintained in the final uncompressed file, to the extent that the
|
||||
user permissions allow it.
|
||||
|
||||
On my Mac OS X PowerPC G4, gun is almost twice as fast as gunzip (version
|
||||
1.2.4) is on the same file, when gun is linked with zlib 1.2.2. Also the
|
||||
LZW decompression provided by gun is about twice as fast as the standard
|
||||
Unix uncompress command.
|
||||
*/
|
||||
|
||||
/* external functions and related types and constants */
|
||||
#include <stdio.h> /* fprintf() */
|
||||
#include <stdlib.h> /* malloc(), free() */
|
||||
#include <string.h> /* strerror(), strcmp(), strlen(), memcpy() */
|
||||
#include <errno.h> /* errno */
|
||||
#include <fcntl.h> /* open() */
|
||||
#include <unistd.h> /* read(), write(), close(), chown(), unlink() */
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h> /* stat(), chmod() */
|
||||
#include <utime.h> /* utime() */
|
||||
#include "zlib.h" /* inflateBackInit(), inflateBack(), */
|
||||
/* inflateBackEnd(), crc32() */
|
||||
|
||||
/* function declaration */
|
||||
#define local static
|
||||
|
||||
/* buffer constants */
|
||||
#define SIZE 32768U /* input and output buffer sizes */
|
||||
#define PIECE 16384 /* limits i/o chunks for 16-bit int case */
|
||||
|
||||
/* structure for infback() to pass to input function in() -- it maintains the
|
||||
input file and a buffer of size SIZE */
|
||||
struct ind {
|
||||
int infile;
|
||||
unsigned char *inbuf;
|
||||
};
|
||||
|
||||
/* Load input buffer, assumed to be empty, and return bytes loaded and a
|
||||
pointer to them. read() is called until the buffer is full, or until it
|
||||
returns end-of-file or error. Return 0 on error. */
|
||||
local unsigned in(void *in_desc, z_const unsigned char **buf)
|
||||
{
|
||||
int ret;
|
||||
unsigned len;
|
||||
unsigned char *next;
|
||||
struct ind *me = (struct ind *)in_desc;
|
||||
|
||||
next = me->inbuf;
|
||||
*buf = next;
|
||||
len = 0;
|
||||
do {
|
||||
ret = PIECE;
|
||||
if ((unsigned)ret > SIZE - len)
|
||||
ret = (int)(SIZE - len);
|
||||
ret = (int)read(me->infile, next, ret);
|
||||
if (ret == -1) {
|
||||
len = 0;
|
||||
break;
|
||||
}
|
||||
next += ret;
|
||||
len += ret;
|
||||
} while (ret != 0 && len < SIZE);
|
||||
return len;
|
||||
}
|
||||
|
||||
/* structure for infback() to pass to output function out() -- it maintains the
|
||||
output file, a running CRC-32 check on the output and the total number of
|
||||
bytes output, both for checking against the gzip trailer. (The length in
|
||||
the gzip trailer is stored modulo 2^32, so it's ok if a long is 32 bits and
|
||||
the output is greater than 4 GB.) */
|
||||
struct outd {
|
||||
int outfile;
|
||||
int check; /* true if checking crc and total */
|
||||
unsigned long crc;
|
||||
unsigned long total;
|
||||
};
|
||||
|
||||
/* Write output buffer and update the CRC-32 and total bytes written. write()
|
||||
is called until all of the output is written or an error is encountered.
|
||||
On success out() returns 0. For a write failure, out() returns 1. If the
|
||||
output file descriptor is -1, then nothing is written.
|
||||
*/
|
||||
local int out(void *out_desc, unsigned char *buf, unsigned len)
|
||||
{
|
||||
int ret;
|
||||
struct outd *me = (struct outd *)out_desc;
|
||||
|
||||
if (me->check) {
|
||||
me->crc = crc32(me->crc, buf, len);
|
||||
me->total += len;
|
||||
}
|
||||
if (me->outfile != -1)
|
||||
do {
|
||||
ret = PIECE;
|
||||
if ((unsigned)ret > len)
|
||||
ret = (int)len;
|
||||
ret = (int)write(me->outfile, buf, ret);
|
||||
if (ret == -1)
|
||||
return 1;
|
||||
buf += ret;
|
||||
len -= ret;
|
||||
} while (len != 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* next input byte macro for use inside lunpipe() and gunpipe() */
|
||||
#define NEXT() (have ? 0 : (have = in(indp, &next)), \
|
||||
last = have ? (have--, (int)(*next++)) : -1)
|
||||
|
||||
/* memory for gunpipe() and lunpipe() --
|
||||
the first 256 entries of prefix[] and suffix[] are never used, could
|
||||
have offset the index, but it's faster to waste the memory */
|
||||
unsigned char inbuf[SIZE]; /* input buffer */
|
||||
unsigned char outbuf[SIZE]; /* output buffer */
|
||||
unsigned short prefix[65536]; /* index to LZW prefix string */
|
||||
unsigned char suffix[65536]; /* one-character LZW suffix */
|
||||
unsigned char match[65280 + 2]; /* buffer for reversed match or gzip
|
||||
32K sliding window */
|
||||
|
||||
/* throw out what's left in the current bits byte buffer (this is a vestigial
|
||||
aspect of the compressed data format derived from an implementation that
|
||||
made use of a special VAX machine instruction!) */
|
||||
#define FLUSHCODE() \
|
||||
do { \
|
||||
left = 0; \
|
||||
rem = 0; \
|
||||
if (chunk > have) { \
|
||||
chunk -= have; \
|
||||
have = 0; \
|
||||
if (NEXT() == -1) \
|
||||
break; \
|
||||
chunk--; \
|
||||
if (chunk > have) { \
|
||||
chunk = have = 0; \
|
||||
break; \
|
||||
} \
|
||||
} \
|
||||
have -= chunk; \
|
||||
next += chunk; \
|
||||
chunk = 0; \
|
||||
} while (0)
|
||||
|
||||
/* Decompress a compress (LZW) file from indp to outfile. The compress magic
|
||||
header (two bytes) has already been read and verified. There are have bytes
|
||||
of buffered input at next. strm is used for passing error information back
|
||||
to gunpipe().
|
||||
|
||||
lunpipe() will return Z_OK on success, Z_BUF_ERROR for an unexpected end of
|
||||
file, read error, or write error (a write error indicated by strm->next_in
|
||||
not equal to Z_NULL), or Z_DATA_ERROR for invalid input.
|
||||
*/
|
||||
local int lunpipe(unsigned have, z_const unsigned char *next, struct ind *indp,
|
||||
int outfile, z_stream *strm)
|
||||
{
|
||||
int last; /* last byte read by NEXT(), or -1 if EOF */
|
||||
unsigned chunk; /* bytes left in current chunk */
|
||||
int left; /* bits left in rem */
|
||||
unsigned rem; /* unused bits from input */
|
||||
int bits; /* current bits per code */
|
||||
unsigned code; /* code, table traversal index */
|
||||
unsigned mask; /* mask for current bits codes */
|
||||
int max; /* maximum bits per code for this stream */
|
||||
unsigned flags; /* compress flags, then block compress flag */
|
||||
unsigned end; /* last valid entry in prefix/suffix tables */
|
||||
unsigned temp; /* current code */
|
||||
unsigned prev; /* previous code */
|
||||
unsigned final; /* last character written for previous code */
|
||||
unsigned stack; /* next position for reversed string */
|
||||
unsigned outcnt; /* bytes in output buffer */
|
||||
struct outd outd; /* output structure */
|
||||
unsigned char *p;
|
||||
|
||||
/* set up output */
|
||||
outd.outfile = outfile;
|
||||
outd.check = 0;
|
||||
|
||||
/* process remainder of compress header -- a flags byte */
|
||||
flags = NEXT();
|
||||
if (last == -1)
|
||||
return Z_BUF_ERROR;
|
||||
if (flags & 0x60) {
|
||||
strm->msg = (char *)"unknown lzw flags set";
|
||||
return Z_DATA_ERROR;
|
||||
}
|
||||
max = flags & 0x1f;
|
||||
if (max < 9 || max > 16) {
|
||||
strm->msg = (char *)"lzw bits out of range";
|
||||
return Z_DATA_ERROR;
|
||||
}
|
||||
if (max == 9) /* 9 doesn't really mean 9 */
|
||||
max = 10;
|
||||
flags &= 0x80; /* true if block compress */
|
||||
|
||||
/* clear table */
|
||||
bits = 9;
|
||||
mask = 0x1ff;
|
||||
end = flags ? 256 : 255;
|
||||
|
||||
/* set up: get first 9-bit code, which is the first decompressed byte, but
|
||||
don't create a table entry until the next code */
|
||||
if (NEXT() == -1) /* no compressed data is ok */
|
||||
return Z_OK;
|
||||
final = prev = (unsigned)last; /* low 8 bits of code */
|
||||
if (NEXT() == -1) /* missing a bit */
|
||||
return Z_BUF_ERROR;
|
||||
if (last & 1) { /* code must be < 256 */
|
||||
strm->msg = (char *)"invalid lzw code";
|
||||
return Z_DATA_ERROR;
|
||||
}
|
||||
rem = (unsigned)last >> 1; /* remaining 7 bits */
|
||||
left = 7;
|
||||
chunk = bits - 2; /* 7 bytes left in this chunk */
|
||||
outbuf[0] = (unsigned char)final; /* write first decompressed byte */
|
||||
outcnt = 1;
|
||||
|
||||
/* decode codes */
|
||||
stack = 0;
|
||||
for (;;) {
|
||||
/* if the table will be full after this, increment the code size */
|
||||
if (end >= mask && bits < max) {
|
||||
FLUSHCODE();
|
||||
bits++;
|
||||
mask <<= 1;
|
||||
mask++;
|
||||
}
|
||||
|
||||
/* get a code of length bits */
|
||||
if (chunk == 0) /* decrement chunk modulo bits */
|
||||
chunk = bits;
|
||||
code = rem; /* low bits of code */
|
||||
if (NEXT() == -1) { /* EOF is end of compressed data */
|
||||
/* write remaining buffered output */
|
||||
if (outcnt && out(&outd, outbuf, outcnt)) {
|
||||
strm->next_in = outbuf; /* signal write error */
|
||||
return Z_BUF_ERROR;
|
||||
}
|
||||
return Z_OK;
|
||||
}
|
||||
code += (unsigned)last << left; /* middle (or high) bits of code */
|
||||
left += 8;
|
||||
chunk--;
|
||||
if (bits > left) { /* need more bits */
|
||||
if (NEXT() == -1) /* can't end in middle of code */
|
||||
return Z_BUF_ERROR;
|
||||
code += (unsigned)last << left; /* high bits of code */
|
||||
left += 8;
|
||||
chunk--;
|
||||
}
|
||||
code &= mask; /* mask to current code length */
|
||||
left -= bits; /* number of unused bits */
|
||||
rem = (unsigned)last >> (8 - left); /* unused bits from last byte */
|
||||
|
||||
/* process clear code (256) */
|
||||
if (code == 256 && flags) {
|
||||
FLUSHCODE();
|
||||
bits = 9; /* initialize bits and mask */
|
||||
mask = 0x1ff;
|
||||
end = 255; /* empty table */
|
||||
continue; /* get next code */
|
||||
}
|
||||
|
||||
/* special code to reuse last match */
|
||||
temp = code; /* save the current code */
|
||||
if (code > end) {
|
||||
/* Be picky on the allowed code here, and make sure that the code
|
||||
we drop through (prev) will be a valid index so that random
|
||||
input does not cause an exception. The code != end + 1 check is
|
||||
empirically derived, and not checked in the original uncompress
|
||||
code. If this ever causes a problem, that check could be safely
|
||||
removed. Leaving this check in greatly improves gun's ability
|
||||
to detect random or corrupted input after a compress header.
|
||||
In any case, the prev > end check must be retained. */
|
||||
if (code != end + 1 || prev > end) {
|
||||
strm->msg = (char *)"invalid lzw code";
|
||||
return Z_DATA_ERROR;
|
||||
}
|
||||
match[stack++] = (unsigned char)final;
|
||||
code = prev;
|
||||
}
|
||||
|
||||
/* walk through linked list to generate output in reverse order */
|
||||
p = match + stack;
|
||||
while (code >= 256) {
|
||||
*p++ = suffix[code];
|
||||
code = prefix[code];
|
||||
}
|
||||
stack = p - match;
|
||||
match[stack++] = (unsigned char)code;
|
||||
final = code;
|
||||
|
||||
/* link new table entry */
|
||||
if (end < mask) {
|
||||
end++;
|
||||
prefix[end] = (unsigned short)prev;
|
||||
suffix[end] = (unsigned char)final;
|
||||
}
|
||||
|
||||
/* set previous code for next iteration */
|
||||
prev = temp;
|
||||
|
||||
/* write output in forward order */
|
||||
while (stack > SIZE - outcnt) {
|
||||
while (outcnt < SIZE)
|
||||
outbuf[outcnt++] = match[--stack];
|
||||
if (out(&outd, outbuf, outcnt)) {
|
||||
strm->next_in = outbuf; /* signal write error */
|
||||
return Z_BUF_ERROR;
|
||||
}
|
||||
outcnt = 0;
|
||||
}
|
||||
p = match + stack;
|
||||
do {
|
||||
outbuf[outcnt++] = *--p;
|
||||
} while (p > match);
|
||||
stack = 0;
|
||||
|
||||
/* loop for next code with final and prev as the last match, rem and
|
||||
left provide the first 0..7 bits of the next code, end is the last
|
||||
valid table entry */
|
||||
}
|
||||
}
|
||||
|
||||
/* Decompress a gzip file from infile to outfile. strm is assumed to have been
|
||||
successfully initialized with inflateBackInit(). The input file may consist
|
||||
of a series of gzip streams, in which case all of them will be decompressed
|
||||
to the output file. If outfile is -1, then the gzip stream(s) integrity is
|
||||
checked and nothing is written.
|
||||
|
||||
The return value is a zlib error code: Z_MEM_ERROR if out of memory,
|
||||
Z_DATA_ERROR if the header or the compressed data is invalid, or if the
|
||||
trailer CRC-32 check or length doesn't match, Z_BUF_ERROR if the input ends
|
||||
prematurely or a write error occurs, or Z_ERRNO if junk (not a another gzip
|
||||
stream) follows a valid gzip stream.
|
||||
*/
|
||||
local int gunpipe(z_stream *strm, int infile, int outfile)
|
||||
{
|
||||
int ret, first, last;
|
||||
unsigned have, flags, len;
|
||||
z_const unsigned char *next = NULL;
|
||||
struct ind ind, *indp;
|
||||
struct outd outd;
|
||||
|
||||
/* setup input buffer */
|
||||
ind.infile = infile;
|
||||
ind.inbuf = inbuf;
|
||||
indp = &ind;
|
||||
|
||||
/* decompress concatenated gzip streams */
|
||||
have = 0; /* no input data read in yet */
|
||||
first = 1; /* looking for first gzip header */
|
||||
strm->next_in = Z_NULL; /* so Z_BUF_ERROR means EOF */
|
||||
for (;;) {
|
||||
/* look for the two magic header bytes for a gzip stream */
|
||||
if (NEXT() == -1) {
|
||||
ret = Z_OK;
|
||||
break; /* empty gzip stream is ok */
|
||||
}
|
||||
if (last != 31 || (NEXT() != 139 && last != 157)) {
|
||||
strm->msg = (char *)"incorrect header check";
|
||||
ret = first ? Z_DATA_ERROR : Z_ERRNO;
|
||||
break; /* not a gzip or compress header */
|
||||
}
|
||||
first = 0; /* next non-header is junk */
|
||||
|
||||
/* process a compress (LZW) file -- can't be concatenated after this */
|
||||
if (last == 157) {
|
||||
ret = lunpipe(have, next, indp, outfile, strm);
|
||||
break;
|
||||
}
|
||||
|
||||
/* process remainder of gzip header */
|
||||
ret = Z_BUF_ERROR;
|
||||
if (NEXT() != 8) { /* only deflate method allowed */
|
||||
if (last == -1) break;
|
||||
strm->msg = (char *)"unknown compression method";
|
||||
ret = Z_DATA_ERROR;
|
||||
break;
|
||||
}
|
||||
flags = NEXT(); /* header flags */
|
||||
NEXT(); /* discard mod time, xflgs, os */
|
||||
NEXT();
|
||||
NEXT();
|
||||
NEXT();
|
||||
NEXT();
|
||||
NEXT();
|
||||
if (last == -1) break;
|
||||
if (flags & 0xe0) {
|
||||
strm->msg = (char *)"unknown header flags set";
|
||||
ret = Z_DATA_ERROR;
|
||||
break;
|
||||
}
|
||||
if (flags & 4) { /* extra field */
|
||||
len = NEXT();
|
||||
len += (unsigned)(NEXT()) << 8;
|
||||
if (last == -1) break;
|
||||
while (len > have) {
|
||||
len -= have;
|
||||
have = 0;
|
||||
if (NEXT() == -1) break;
|
||||
len--;
|
||||
}
|
||||
if (last == -1) break;
|
||||
have -= len;
|
||||
next += len;
|
||||
}
|
||||
if (flags & 8) /* file name */
|
||||
while (NEXT() != 0 && last != -1)
|
||||
;
|
||||
if (flags & 16) /* comment */
|
||||
while (NEXT() != 0 && last != -1)
|
||||
;
|
||||
if (flags & 2) { /* header crc */
|
||||
NEXT();
|
||||
NEXT();
|
||||
}
|
||||
if (last == -1) break;
|
||||
|
||||
/* set up output */
|
||||
outd.outfile = outfile;
|
||||
outd.check = 1;
|
||||
outd.crc = crc32(0L, Z_NULL, 0);
|
||||
outd.total = 0;
|
||||
|
||||
/* decompress data to output */
|
||||
strm->next_in = next;
|
||||
strm->avail_in = have;
|
||||
ret = inflateBack(strm, in, indp, out, &outd);
|
||||
if (ret != Z_STREAM_END) break;
|
||||
next = strm->next_in;
|
||||
have = strm->avail_in;
|
||||
strm->next_in = Z_NULL; /* so Z_BUF_ERROR means EOF */
|
||||
|
||||
/* check trailer */
|
||||
ret = Z_BUF_ERROR;
|
||||
if (NEXT() != (int)(outd.crc & 0xff) ||
|
||||
NEXT() != (int)((outd.crc >> 8) & 0xff) ||
|
||||
NEXT() != (int)((outd.crc >> 16) & 0xff) ||
|
||||
NEXT() != (int)((outd.crc >> 24) & 0xff)) {
|
||||
/* crc error */
|
||||
if (last != -1) {
|
||||
strm->msg = (char *)"incorrect data check";
|
||||
ret = Z_DATA_ERROR;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (NEXT() != (int)(outd.total & 0xff) ||
|
||||
NEXT() != (int)((outd.total >> 8) & 0xff) ||
|
||||
NEXT() != (int)((outd.total >> 16) & 0xff) ||
|
||||
NEXT() != (int)((outd.total >> 24) & 0xff)) {
|
||||
/* length error */
|
||||
if (last != -1) {
|
||||
strm->msg = (char *)"incorrect length check";
|
||||
ret = Z_DATA_ERROR;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
/* go back and look for another gzip stream */
|
||||
}
|
||||
|
||||
/* clean up and return */
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Copy file attributes, from -> to, as best we can. This is best effort, so
|
||||
no errors are reported. The mode bits, including suid, sgid, and the sticky
|
||||
bit are copied (if allowed), the owner's user id and group id are copied
|
||||
(again if allowed), and the access and modify times are copied. */
|
||||
local void copymeta(char *from, char *to)
|
||||
{
|
||||
struct stat was;
|
||||
struct utimbuf when;
|
||||
|
||||
/* get all of from's Unix meta data, return if not a regular file */
|
||||
if (stat(from, &was) != 0 || (was.st_mode & S_IFMT) != S_IFREG)
|
||||
return;
|
||||
|
||||
/* set to's mode bits, ignore errors */
|
||||
(void)chmod(to, was.st_mode & 07777);
|
||||
|
||||
/* copy owner's user and group, ignore errors */
|
||||
(void)chown(to, was.st_uid, was.st_gid);
|
||||
|
||||
/* copy access and modify times, ignore errors */
|
||||
when.actime = was.st_atime;
|
||||
when.modtime = was.st_mtime;
|
||||
(void)utime(to, &when);
|
||||
}
|
||||
|
||||
/* Decompress the file inname to the file outnname, of if test is true, just
|
||||
decompress without writing and check the gzip trailer for integrity. If
|
||||
inname is NULL or an empty string, read from stdin. If outname is NULL or
|
||||
an empty string, write to stdout. strm is a pre-initialized inflateBack
|
||||
structure. When appropriate, copy the file attributes from inname to
|
||||
outname.
|
||||
|
||||
gunzip() returns 1 if there is an out-of-memory error or an unexpected
|
||||
return code from gunpipe(). Otherwise it returns 0.
|
||||
*/
|
||||
local int gunzip(z_stream *strm, char *inname, char *outname, int test)
|
||||
{
|
||||
int ret;
|
||||
int infile, outfile;
|
||||
|
||||
/* open files */
|
||||
if (inname == NULL || *inname == 0) {
|
||||
inname = "-";
|
||||
infile = 0; /* stdin */
|
||||
}
|
||||
else {
|
||||
infile = open(inname, O_RDONLY, 0);
|
||||
if (infile == -1) {
|
||||
fprintf(stderr, "gun cannot open %s\n", inname);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
if (test)
|
||||
outfile = -1;
|
||||
else if (outname == NULL || *outname == 0) {
|
||||
outname = "-";
|
||||
outfile = 1; /* stdout */
|
||||
}
|
||||
else {
|
||||
outfile = open(outname, O_CREAT | O_TRUNC | O_WRONLY, 0666);
|
||||
if (outfile == -1) {
|
||||
close(infile);
|
||||
fprintf(stderr, "gun cannot create %s\n", outname);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
errno = 0;
|
||||
|
||||
/* decompress */
|
||||
ret = gunpipe(strm, infile, outfile);
|
||||
if (outfile > 2) close(outfile);
|
||||
if (infile > 2) close(infile);
|
||||
|
||||
/* interpret result */
|
||||
switch (ret) {
|
||||
case Z_OK:
|
||||
case Z_ERRNO:
|
||||
if (infile > 2 && outfile > 2) {
|
||||
copymeta(inname, outname); /* copy attributes */
|
||||
unlink(inname);
|
||||
}
|
||||
if (ret == Z_ERRNO)
|
||||
fprintf(stderr, "gun warning: trailing garbage ignored in %s\n",
|
||||
inname);
|
||||
break;
|
||||
case Z_DATA_ERROR:
|
||||
if (outfile > 2) unlink(outname);
|
||||
fprintf(stderr, "gun data error on %s: %s\n", inname, strm->msg);
|
||||
break;
|
||||
case Z_MEM_ERROR:
|
||||
if (outfile > 2) unlink(outname);
|
||||
fprintf(stderr, "gun out of memory error--aborting\n");
|
||||
return 1;
|
||||
case Z_BUF_ERROR:
|
||||
if (outfile > 2) unlink(outname);
|
||||
if (strm->next_in != Z_NULL) {
|
||||
fprintf(stderr, "gun write error on %s: %s\n",
|
||||
outname, strerror(errno));
|
||||
}
|
||||
else if (errno) {
|
||||
fprintf(stderr, "gun read error on %s: %s\n",
|
||||
inname, strerror(errno));
|
||||
}
|
||||
else {
|
||||
fprintf(stderr, "gun unexpected end of file on %s\n",
|
||||
inname);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
if (outfile > 2) unlink(outname);
|
||||
fprintf(stderr, "gun internal error--aborting\n");
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Process the gun command line arguments. See the command syntax near the
|
||||
beginning of this source file. */
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int ret, len, test;
|
||||
char *outname;
|
||||
unsigned char *window;
|
||||
z_stream strm;
|
||||
|
||||
/* initialize inflateBack state for repeated use */
|
||||
window = match; /* reuse LZW match buffer */
|
||||
strm.zalloc = Z_NULL;
|
||||
strm.zfree = Z_NULL;
|
||||
strm.opaque = Z_NULL;
|
||||
ret = inflateBackInit(&strm, 15, window);
|
||||
if (ret != Z_OK) {
|
||||
fprintf(stderr, "gun out of memory error--aborting\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* decompress each file to the same name with the suffix removed */
|
||||
argc--;
|
||||
argv++;
|
||||
test = 0;
|
||||
if (argc && strcmp(*argv, "-h") == 0) {
|
||||
fprintf(stderr, "gun 1.6 (17 Jan 2010)\n");
|
||||
fprintf(stderr, "Copyright (C) 2003-2010 Mark Adler\n");
|
||||
fprintf(stderr, "usage: gun [-t] [file1.gz [file2.Z ...]]\n");
|
||||
return 0;
|
||||
}
|
||||
if (argc && strcmp(*argv, "-t") == 0) {
|
||||
test = 1;
|
||||
argc--;
|
||||
argv++;
|
||||
}
|
||||
if (argc)
|
||||
do {
|
||||
if (test)
|
||||
outname = NULL;
|
||||
else {
|
||||
len = (int)strlen(*argv);
|
||||
if (strcmp(*argv + len - 3, ".gz") == 0 ||
|
||||
strcmp(*argv + len - 3, "-gz") == 0)
|
||||
len -= 3;
|
||||
else if (strcmp(*argv + len - 2, ".z") == 0 ||
|
||||
strcmp(*argv + len - 2, "-z") == 0 ||
|
||||
strcmp(*argv + len - 2, "_z") == 0 ||
|
||||
strcmp(*argv + len - 2, ".Z") == 0)
|
||||
len -= 2;
|
||||
else {
|
||||
fprintf(stderr, "gun error: no gz type on %s--skipping\n",
|
||||
*argv);
|
||||
continue;
|
||||
}
|
||||
outname = malloc(len + 1);
|
||||
if (outname == NULL) {
|
||||
fprintf(stderr, "gun out of memory error--aborting\n");
|
||||
ret = 1;
|
||||
break;
|
||||
}
|
||||
memcpy(outname, *argv, len);
|
||||
outname[len] = 0;
|
||||
}
|
||||
ret = gunzip(&strm, *argv, outname, test);
|
||||
if (outname != NULL) free(outname);
|
||||
if (ret) break;
|
||||
} while (argv++, --argc);
|
||||
else
|
||||
ret = gunzip(&strm, NULL, NULL, test);
|
||||
|
||||
/* clean up */
|
||||
inflateBackEnd(&strm);
|
||||
return ret;
|
||||
}
|
|
@ -0,0 +1,504 @@
|
|||
/* gzappend -- command to append to a gzip file
|
||||
|
||||
Copyright (C) 2003, 2012 Mark Adler, all rights reserved
|
||||
version 1.2, 11 Oct 2012
|
||||
|
||||
This software is provided 'as-is', without any express or implied
|
||||
warranty. In no event will the author be held liable for any damages
|
||||
arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not
|
||||
claim that you wrote the original software. If you use this software
|
||||
in a product, an acknowledgment in the product documentation would be
|
||||
appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
Mark Adler madler@alumni.caltech.edu
|
||||
*/
|
||||
|
||||
/*
|
||||
* Change history:
|
||||
*
|
||||
* 1.0 19 Oct 2003 - First version
|
||||
* 1.1 4 Nov 2003 - Expand and clarify some comments and notes
|
||||
* - Add version and copyright to help
|
||||
* - Send help to stdout instead of stderr
|
||||
* - Add some preemptive typecasts
|
||||
* - Add L to constants in lseek() calls
|
||||
* - Remove some debugging information in error messages
|
||||
* - Use new data_type definition for zlib 1.2.1
|
||||
* - Simplfy and unify file operations
|
||||
* - Finish off gzip file in gztack()
|
||||
* - Use deflatePrime() instead of adding empty blocks
|
||||
* - Keep gzip file clean on appended file read errors
|
||||
* - Use in-place rotate instead of auxiliary buffer
|
||||
* (Why you ask? Because it was fun to write!)
|
||||
* 1.2 11 Oct 2012 - Fix for proper z_const usage
|
||||
* - Check for input buffer malloc failure
|
||||
*/
|
||||
|
||||
/*
|
||||
gzappend takes a gzip file and appends to it, compressing files from the
|
||||
command line or data from stdin. The gzip file is written to directly, to
|
||||
avoid copying that file, in case it's large. Note that this results in the
|
||||
unfriendly behavior that if gzappend fails, the gzip file is corrupted.
|
||||
|
||||
This program was written to illustrate the use of the new Z_BLOCK option of
|
||||
zlib 1.2.x's inflate() function. This option returns from inflate() at each
|
||||
block boundary to facilitate locating and modifying the last block bit at
|
||||
the start of the final deflate block. Also whether using Z_BLOCK or not,
|
||||
another required feature of zlib 1.2.x is that inflate() now provides the
|
||||
number of unusued bits in the last input byte used. gzappend will not work
|
||||
with versions of zlib earlier than 1.2.1.
|
||||
|
||||
gzappend first decompresses the gzip file internally, discarding all but
|
||||
the last 32K of uncompressed data, and noting the location of the last block
|
||||
bit and the number of unused bits in the last byte of the compressed data.
|
||||
The gzip trailer containing the CRC-32 and length of the uncompressed data
|
||||
is verified. This trailer will be later overwritten.
|
||||
|
||||
Then the last block bit is cleared by seeking back in the file and rewriting
|
||||
the byte that contains it. Seeking forward, the last byte of the compressed
|
||||
data is saved along with the number of unused bits to initialize deflate.
|
||||
|
||||
A deflate process is initialized, using the last 32K of the uncompressed
|
||||
data from the gzip file to initialize the dictionary. If the total
|
||||
uncompressed data was less than 32K, then all of it is used to initialize
|
||||
the dictionary. The deflate output bit buffer is also initialized with the
|
||||
last bits from the original deflate stream. From here on, the data to
|
||||
append is simply compressed using deflate, and written to the gzip file.
|
||||
When that is complete, the new CRC-32 and uncompressed length are written
|
||||
as the trailer of the gzip file.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include "zlib.h"
|
||||
|
||||
#define local static
|
||||
#define LGCHUNK 14
|
||||
#define CHUNK (1U << LGCHUNK)
|
||||
#define DSIZE 32768U
|
||||
|
||||
/* print an error message and terminate with extreme prejudice */
|
||||
local void bye(char *msg1, char *msg2)
|
||||
{
|
||||
fprintf(stderr, "gzappend error: %s%s\n", msg1, msg2);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/* return the greatest common divisor of a and b using Euclid's algorithm,
|
||||
modified to be fast when one argument much greater than the other, and
|
||||
coded to avoid unnecessary swapping */
|
||||
local unsigned gcd(unsigned a, unsigned b)
|
||||
{
|
||||
unsigned c;
|
||||
|
||||
while (a && b)
|
||||
if (a > b) {
|
||||
c = b;
|
||||
while (a - c >= c)
|
||||
c <<= 1;
|
||||
a -= c;
|
||||
}
|
||||
else {
|
||||
c = a;
|
||||
while (b - c >= c)
|
||||
c <<= 1;
|
||||
b -= c;
|
||||
}
|
||||
return a + b;
|
||||
}
|
||||
|
||||
/* rotate list[0..len-1] left by rot positions, in place */
|
||||
local void rotate(unsigned char *list, unsigned len, unsigned rot)
|
||||
{
|
||||
unsigned char tmp;
|
||||
unsigned cycles;
|
||||
unsigned char *start, *last, *to, *from;
|
||||
|
||||
/* normalize rot and handle degenerate cases */
|
||||
if (len < 2) return;
|
||||
if (rot >= len) rot %= len;
|
||||
if (rot == 0) return;
|
||||
|
||||
/* pointer to last entry in list */
|
||||
last = list + (len - 1);
|
||||
|
||||
/* do simple left shift by one */
|
||||
if (rot == 1) {
|
||||
tmp = *list;
|
||||
memcpy(list, list + 1, len - 1);
|
||||
*last = tmp;
|
||||
return;
|
||||
}
|
||||
|
||||
/* do simple right shift by one */
|
||||
if (rot == len - 1) {
|
||||
tmp = *last;
|
||||
memmove(list + 1, list, len - 1);
|
||||
*list = tmp;
|
||||
return;
|
||||
}
|
||||
|
||||
/* otherwise do rotate as a set of cycles in place */
|
||||
cycles = gcd(len, rot); /* number of cycles */
|
||||
do {
|
||||
start = from = list + cycles; /* start index is arbitrary */
|
||||
tmp = *from; /* save entry to be overwritten */
|
||||
for (;;) {
|
||||
to = from; /* next step in cycle */
|
||||
from += rot; /* go right rot positions */
|
||||
if (from > last) from -= len; /* (pointer better not wrap) */
|
||||
if (from == start) break; /* all but one shifted */
|
||||
*to = *from; /* shift left */
|
||||
}
|
||||
*to = tmp; /* complete the circle */
|
||||
} while (--cycles);
|
||||
}
|
||||
|
||||
/* structure for gzip file read operations */
|
||||
typedef struct {
|
||||
int fd; /* file descriptor */
|
||||
int size; /* 1 << size is bytes in buf */
|
||||
unsigned left; /* bytes available at next */
|
||||
unsigned char *buf; /* buffer */
|
||||
z_const unsigned char *next; /* next byte in buffer */
|
||||
char *name; /* file name for error messages */
|
||||
} file;
|
||||
|
||||
/* reload buffer */
|
||||
local int readin(file *in)
|
||||
{
|
||||
int len;
|
||||
|
||||
len = read(in->fd, in->buf, 1 << in->size);
|
||||
if (len == -1) bye("error reading ", in->name);
|
||||
in->left = (unsigned)len;
|
||||
in->next = in->buf;
|
||||
return len;
|
||||
}
|
||||
|
||||
/* read from file in, exit if end-of-file */
|
||||
local int readmore(file *in)
|
||||
{
|
||||
if (readin(in) == 0) bye("unexpected end of ", in->name);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define read1(in) (in->left == 0 ? readmore(in) : 0, \
|
||||
in->left--, *(in->next)++)
|
||||
|
||||
/* skip over n bytes of in */
|
||||
local void skip(file *in, unsigned n)
|
||||
{
|
||||
unsigned bypass;
|
||||
|
||||
if (n > in->left) {
|
||||
n -= in->left;
|
||||
bypass = n & ~((1U << in->size) - 1);
|
||||
if (bypass) {
|
||||
if (lseek(in->fd, (off_t)bypass, SEEK_CUR) == -1)
|
||||
bye("seeking ", in->name);
|
||||
n -= bypass;
|
||||
}
|
||||
readmore(in);
|
||||
if (n > in->left)
|
||||
bye("unexpected end of ", in->name);
|
||||
}
|
||||
in->left -= n;
|
||||
in->next += n;
|
||||
}
|
||||
|
||||
/* read a four-byte unsigned integer, little-endian, from in */
|
||||
unsigned long read4(file *in)
|
||||
{
|
||||
unsigned long val;
|
||||
|
||||
val = read1(in);
|
||||
val += (unsigned)read1(in) << 8;
|
||||
val += (unsigned long)read1(in) << 16;
|
||||
val += (unsigned long)read1(in) << 24;
|
||||
return val;
|
||||
}
|
||||
|
||||
/* skip over gzip header */
|
||||
local void gzheader(file *in)
|
||||
{
|
||||
int flags;
|
||||
unsigned n;
|
||||
|
||||
if (read1(in) != 31 || read1(in) != 139) bye(in->name, " not a gzip file");
|
||||
if (read1(in) != 8) bye("unknown compression method in", in->name);
|
||||
flags = read1(in);
|
||||
if (flags & 0xe0) bye("unknown header flags set in", in->name);
|
||||
skip(in, 6);
|
||||
if (flags & 4) {
|
||||
n = read1(in);
|
||||
n += (unsigned)(read1(in)) << 8;
|
||||
skip(in, n);
|
||||
}
|
||||
if (flags & 8) while (read1(in) != 0) ;
|
||||
if (flags & 16) while (read1(in) != 0) ;
|
||||
if (flags & 2) skip(in, 2);
|
||||
}
|
||||
|
||||
/* decompress gzip file "name", return strm with a deflate stream ready to
|
||||
continue compression of the data in the gzip file, and return a file
|
||||
descriptor pointing to where to write the compressed data -- the deflate
|
||||
stream is initialized to compress using level "level" */
|
||||
local int gzscan(char *name, z_stream *strm, int level)
|
||||
{
|
||||
int ret, lastbit, left, full;
|
||||
unsigned have;
|
||||
unsigned long crc, tot;
|
||||
unsigned char *window;
|
||||
off_t lastoff, end;
|
||||
file gz;
|
||||
|
||||
/* open gzip file */
|
||||
gz.name = name;
|
||||
gz.fd = open(name, O_RDWR, 0);
|
||||
if (gz.fd == -1) bye("cannot open ", name);
|
||||
gz.buf = malloc(CHUNK);
|
||||
if (gz.buf == NULL) bye("out of memory", "");
|
||||
gz.size = LGCHUNK;
|
||||
gz.left = 0;
|
||||
|
||||
/* skip gzip header */
|
||||
gzheader(&gz);
|
||||
|
||||
/* prepare to decompress */
|
||||
window = malloc(DSIZE);
|
||||
if (window == NULL) bye("out of memory", "");
|
||||
strm->zalloc = Z_NULL;
|
||||
strm->zfree = Z_NULL;
|
||||
strm->opaque = Z_NULL;
|
||||
ret = inflateInit2(strm, -15);
|
||||
if (ret != Z_OK) bye("out of memory", " or library mismatch");
|
||||
|
||||
/* decompress the deflate stream, saving append information */
|
||||
lastbit = 0;
|
||||
lastoff = lseek(gz.fd, 0L, SEEK_CUR) - gz.left;
|
||||
left = 0;
|
||||
strm->avail_in = gz.left;
|
||||
strm->next_in = gz.next;
|
||||
crc = crc32(0L, Z_NULL, 0);
|
||||
have = full = 0;
|
||||
do {
|
||||
/* if needed, get more input */
|
||||
if (strm->avail_in == 0) {
|
||||
readmore(&gz);
|
||||
strm->avail_in = gz.left;
|
||||
strm->next_in = gz.next;
|
||||
}
|
||||
|
||||
/* set up output to next available section of sliding window */
|
||||
strm->avail_out = DSIZE - have;
|
||||
strm->next_out = window + have;
|
||||
|
||||
/* inflate and check for errors */
|
||||
ret = inflate(strm, Z_BLOCK);
|
||||
if (ret == Z_STREAM_ERROR) bye("internal stream error!", "");
|
||||
if (ret == Z_MEM_ERROR) bye("out of memory", "");
|
||||
if (ret == Z_DATA_ERROR)
|
||||
bye("invalid compressed data--format violated in", name);
|
||||
|
||||
/* update crc and sliding window pointer */
|
||||
crc = crc32(crc, window + have, DSIZE - have - strm->avail_out);
|
||||
if (strm->avail_out)
|
||||
have = DSIZE - strm->avail_out;
|
||||
else {
|
||||
have = 0;
|
||||
full = 1;
|
||||
}
|
||||
|
||||
/* process end of block */
|
||||
if (strm->data_type & 128) {
|
||||
if (strm->data_type & 64)
|
||||
left = strm->data_type & 0x1f;
|
||||
else {
|
||||
lastbit = strm->data_type & 0x1f;
|
||||
lastoff = lseek(gz.fd, 0L, SEEK_CUR) - strm->avail_in;
|
||||
}
|
||||
}
|
||||
} while (ret != Z_STREAM_END);
|
||||
inflateEnd(strm);
|
||||
gz.left = strm->avail_in;
|
||||
gz.next = strm->next_in;
|
||||
|
||||
/* save the location of the end of the compressed data */
|
||||
end = lseek(gz.fd, 0L, SEEK_CUR) - gz.left;
|
||||
|
||||
/* check gzip trailer and save total for deflate */
|
||||
if (crc != read4(&gz))
|
||||
bye("invalid compressed data--crc mismatch in ", name);
|
||||
tot = strm->total_out;
|
||||
if ((tot & 0xffffffffUL) != read4(&gz))
|
||||
bye("invalid compressed data--length mismatch in", name);
|
||||
|
||||
/* if not at end of file, warn */
|
||||
if (gz.left || readin(&gz))
|
||||
fprintf(stderr,
|
||||
"gzappend warning: junk at end of gzip file overwritten\n");
|
||||
|
||||
/* clear last block bit */
|
||||
lseek(gz.fd, lastoff - (lastbit != 0), SEEK_SET);
|
||||
if (read(gz.fd, gz.buf, 1) != 1) bye("reading after seek on ", name);
|
||||
*gz.buf = (unsigned char)(*gz.buf ^ (1 << ((8 - lastbit) & 7)));
|
||||
lseek(gz.fd, -1L, SEEK_CUR);
|
||||
if (write(gz.fd, gz.buf, 1) != 1) bye("writing after seek to ", name);
|
||||
|
||||
/* if window wrapped, build dictionary from window by rotating */
|
||||
if (full) {
|
||||
rotate(window, DSIZE, have);
|
||||
have = DSIZE;
|
||||
}
|
||||
|
||||
/* set up deflate stream with window, crc, total_in, and leftover bits */
|
||||
ret = deflateInit2(strm, level, Z_DEFLATED, -15, 8, Z_DEFAULT_STRATEGY);
|
||||
if (ret != Z_OK) bye("out of memory", "");
|
||||
deflateSetDictionary(strm, window, have);
|
||||
strm->adler = crc;
|
||||
strm->total_in = tot;
|
||||
if (left) {
|
||||
lseek(gz.fd, --end, SEEK_SET);
|
||||
if (read(gz.fd, gz.buf, 1) != 1) bye("reading after seek on ", name);
|
||||
deflatePrime(strm, 8 - left, *gz.buf);
|
||||
}
|
||||
lseek(gz.fd, end, SEEK_SET);
|
||||
|
||||
/* clean up and return */
|
||||
free(window);
|
||||
free(gz.buf);
|
||||
return gz.fd;
|
||||
}
|
||||
|
||||
/* append file "name" to gzip file gd using deflate stream strm -- if last
|
||||
is true, then finish off the deflate stream at the end */
|
||||
local void gztack(char *name, int gd, z_stream *strm, int last)
|
||||
{
|
||||
int fd, len, ret;
|
||||
unsigned left;
|
||||
unsigned char *in, *out;
|
||||
|
||||
/* open file to compress and append */
|
||||
fd = 0;
|
||||
if (name != NULL) {
|
||||
fd = open(name, O_RDONLY, 0);
|
||||
if (fd == -1)
|
||||
fprintf(stderr, "gzappend warning: %s not found, skipping ...\n",
|
||||
name);
|
||||
}
|
||||
|
||||
/* allocate buffers */
|
||||
in = malloc(CHUNK);
|
||||
out = malloc(CHUNK);
|
||||
if (in == NULL || out == NULL) bye("out of memory", "");
|
||||
|
||||
/* compress input file and append to gzip file */
|
||||
do {
|
||||
/* get more input */
|
||||
len = read(fd, in, CHUNK);
|
||||
if (len == -1) {
|
||||
fprintf(stderr,
|
||||
"gzappend warning: error reading %s, skipping rest ...\n",
|
||||
name);
|
||||
len = 0;
|
||||
}
|
||||
strm->avail_in = (unsigned)len;
|
||||
strm->next_in = in;
|
||||
if (len) strm->adler = crc32(strm->adler, in, (unsigned)len);
|
||||
|
||||
/* compress and write all available output */
|
||||
do {
|
||||
strm->avail_out = CHUNK;
|
||||
strm->next_out = out;
|
||||
ret = deflate(strm, last && len == 0 ? Z_FINISH : Z_NO_FLUSH);
|
||||
left = CHUNK - strm->avail_out;
|
||||
while (left) {
|
||||
len = write(gd, out + CHUNK - strm->avail_out - left, left);
|
||||
if (len == -1) bye("writing gzip file", "");
|
||||
left -= (unsigned)len;
|
||||
}
|
||||
} while (strm->avail_out == 0 && ret != Z_STREAM_END);
|
||||
} while (len != 0);
|
||||
|
||||
/* write trailer after last entry */
|
||||
if (last) {
|
||||
deflateEnd(strm);
|
||||
out[0] = (unsigned char)(strm->adler);
|
||||
out[1] = (unsigned char)(strm->adler >> 8);
|
||||
out[2] = (unsigned char)(strm->adler >> 16);
|
||||
out[3] = (unsigned char)(strm->adler >> 24);
|
||||
out[4] = (unsigned char)(strm->total_in);
|
||||
out[5] = (unsigned char)(strm->total_in >> 8);
|
||||
out[6] = (unsigned char)(strm->total_in >> 16);
|
||||
out[7] = (unsigned char)(strm->total_in >> 24);
|
||||
len = 8;
|
||||
do {
|
||||
ret = write(gd, out + 8 - len, len);
|
||||
if (ret == -1) bye("writing gzip file", "");
|
||||
len -= ret;
|
||||
} while (len);
|
||||
close(gd);
|
||||
}
|
||||
|
||||
/* clean up and return */
|
||||
free(out);
|
||||
free(in);
|
||||
if (fd > 0) close(fd);
|
||||
}
|
||||
|
||||
/* process the compression level option if present, scan the gzip file, and
|
||||
append the specified files, or append the data from stdin if no other file
|
||||
names are provided on the command line -- the gzip file must be writable
|
||||
and seekable */
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int gd, level;
|
||||
z_stream strm;
|
||||
|
||||
/* ignore command name */
|
||||
argc--; argv++;
|
||||
|
||||
/* provide usage if no arguments */
|
||||
if (*argv == NULL) {
|
||||
printf(
|
||||
"gzappend 1.2 (11 Oct 2012) Copyright (C) 2003, 2012 Mark Adler\n"
|
||||
);
|
||||
printf(
|
||||
"usage: gzappend [-level] file.gz [ addthis [ andthis ... ]]\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* set compression level */
|
||||
level = Z_DEFAULT_COMPRESSION;
|
||||
if (argv[0][0] == '-') {
|
||||
if (argv[0][1] < '0' || argv[0][1] > '9' || argv[0][2] != 0)
|
||||
bye("invalid compression level", "");
|
||||
level = argv[0][1] - '0';
|
||||
if (*++argv == NULL) bye("no gzip file name after options", "");
|
||||
}
|
||||
|
||||
/* prepare to append to gzip file */
|
||||
gd = gzscan(*argv++, &strm, level);
|
||||
|
||||
/* append files on command line, or from stdin if none */
|
||||
if (*argv == NULL)
|
||||
gztack(NULL, gd, &strm, 1);
|
||||
else
|
||||
do {
|
||||
gztack(*argv, gd, &strm, argv[1] == NULL);
|
||||
} while (*++argv != NULL);
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,449 @@
|
|||
/* gzjoin -- command to join gzip files into one gzip file
|
||||
|
||||
Copyright (C) 2004, 2005, 2012 Mark Adler, all rights reserved
|
||||
version 1.2, 14 Aug 2012
|
||||
|
||||
This software is provided 'as-is', without any express or implied
|
||||
warranty. In no event will the author be held liable for any damages
|
||||
arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not
|
||||
claim that you wrote the original software. If you use this software
|
||||
in a product, an acknowledgment in the product documentation would be
|
||||
appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
Mark Adler madler@alumni.caltech.edu
|
||||
*/
|
||||
|
||||
/*
|
||||
* Change history:
|
||||
*
|
||||
* 1.0 11 Dec 2004 - First version
|
||||
* 1.1 12 Jun 2005 - Changed ssize_t to long for portability
|
||||
* 1.2 14 Aug 2012 - Clean up for z_const usage
|
||||
*/
|
||||
|
||||
/*
|
||||
gzjoin takes one or more gzip files on the command line and writes out a
|
||||
single gzip file that will uncompress to the concatenation of the
|
||||
uncompressed data from the individual gzip files. gzjoin does this without
|
||||
having to recompress any of the data and without having to calculate a new
|
||||
crc32 for the concatenated uncompressed data. gzjoin does however have to
|
||||
decompress all of the input data in order to find the bits in the compressed
|
||||
data that need to be modified to concatenate the streams.
|
||||
|
||||
gzjoin does not do an integrity check on the input gzip files other than
|
||||
checking the gzip header and decompressing the compressed data. They are
|
||||
otherwise assumed to be complete and correct.
|
||||
|
||||
Each joint between gzip files removes at least 18 bytes of previous trailer
|
||||
and subsequent header, and inserts an average of about three bytes to the
|
||||
compressed data in order to connect the streams. The output gzip file
|
||||
has a minimal ten-byte gzip header with no file name or modification time.
|
||||
|
||||
This program was written to illustrate the use of the Z_BLOCK option of
|
||||
inflate() and the crc32_combine() function. gzjoin will not compile with
|
||||
versions of zlib earlier than 1.2.3.
|
||||
*/
|
||||
|
||||
#include <stdio.h> /* fputs(), fprintf(), fwrite(), putc() */
|
||||
#include <stdlib.h> /* exit(), malloc(), free() */
|
||||
#include <fcntl.h> /* open() */
|
||||
#include <unistd.h> /* close(), read(), lseek() */
|
||||
#include "zlib.h"
|
||||
/* crc32(), crc32_combine(), inflateInit2(), inflate(), inflateEnd() */
|
||||
|
||||
#define local static
|
||||
|
||||
/* exit with an error (return a value to allow use in an expression) */
|
||||
local int bail(char *why1, char *why2)
|
||||
{
|
||||
fprintf(stderr, "gzjoin error: %s%s, output incomplete\n", why1, why2);
|
||||
exit(1);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* -- simple buffered file input with access to the buffer -- */
|
||||
|
||||
#define CHUNK 32768 /* must be a power of two and fit in unsigned */
|
||||
|
||||
/* bin buffered input file type */
|
||||
typedef struct {
|
||||
char *name; /* name of file for error messages */
|
||||
int fd; /* file descriptor */
|
||||
unsigned left; /* bytes remaining at next */
|
||||
unsigned char *next; /* next byte to read */
|
||||
unsigned char *buf; /* allocated buffer of length CHUNK */
|
||||
} bin;
|
||||
|
||||
/* close a buffered file and free allocated memory */
|
||||
local void bclose(bin *in)
|
||||
{
|
||||
if (in != NULL) {
|
||||
if (in->fd != -1)
|
||||
close(in->fd);
|
||||
if (in->buf != NULL)
|
||||
free(in->buf);
|
||||
free(in);
|
||||
}
|
||||
}
|
||||
|
||||
/* open a buffered file for input, return a pointer to type bin, or NULL on
|
||||
failure */
|
||||
local bin *bopen(char *name)
|
||||
{
|
||||
bin *in;
|
||||
|
||||
in = malloc(sizeof(bin));
|
||||
if (in == NULL)
|
||||
return NULL;
|
||||
in->buf = malloc(CHUNK);
|
||||
in->fd = open(name, O_RDONLY, 0);
|
||||
if (in->buf == NULL || in->fd == -1) {
|
||||
bclose(in);
|
||||
return NULL;
|
||||
}
|
||||
in->left = 0;
|
||||
in->next = in->buf;
|
||||
in->name = name;
|
||||
return in;
|
||||
}
|
||||
|
||||
/* load buffer from file, return -1 on read error, 0 or 1 on success, with
|
||||
1 indicating that end-of-file was reached */
|
||||
local int bload(bin *in)
|
||||
{
|
||||
long len;
|
||||
|
||||
if (in == NULL)
|
||||
return -1;
|
||||
if (in->left != 0)
|
||||
return 0;
|
||||
in->next = in->buf;
|
||||
do {
|
||||
len = (long)read(in->fd, in->buf + in->left, CHUNK - in->left);
|
||||
if (len < 0)
|
||||
return -1;
|
||||
in->left += (unsigned)len;
|
||||
} while (len != 0 && in->left < CHUNK);
|
||||
return len == 0 ? 1 : 0;
|
||||
}
|
||||
|
||||
/* get a byte from the file, bail if end of file */
|
||||
#define bget(in) (in->left ? 0 : bload(in), \
|
||||
in->left ? (in->left--, *(in->next)++) : \
|
||||
bail("unexpected end of file on ", in->name))
|
||||
|
||||
/* get a four-byte little-endian unsigned integer from file */
|
||||
local unsigned long bget4(bin *in)
|
||||
{
|
||||
unsigned long val;
|
||||
|
||||
val = bget(in);
|
||||
val += (unsigned long)(bget(in)) << 8;
|
||||
val += (unsigned long)(bget(in)) << 16;
|
||||
val += (unsigned long)(bget(in)) << 24;
|
||||
return val;
|
||||
}
|
||||
|
||||
/* skip bytes in file */
|
||||
local void bskip(bin *in, unsigned skip)
|
||||
{
|
||||
/* check pointer */
|
||||
if (in == NULL)
|
||||
return;
|
||||
|
||||
/* easy case -- skip bytes in buffer */
|
||||
if (skip <= in->left) {
|
||||
in->left -= skip;
|
||||
in->next += skip;
|
||||
return;
|
||||
}
|
||||
|
||||
/* skip what's in buffer, discard buffer contents */
|
||||
skip -= in->left;
|
||||
in->left = 0;
|
||||
|
||||
/* seek past multiples of CHUNK bytes */
|
||||
if (skip > CHUNK) {
|
||||
unsigned left;
|
||||
|
||||
left = skip & (CHUNK - 1);
|
||||
if (left == 0) {
|
||||
/* exact number of chunks: seek all the way minus one byte to check
|
||||
for end-of-file with a read */
|
||||
lseek(in->fd, skip - 1, SEEK_CUR);
|
||||
if (read(in->fd, in->buf, 1) != 1)
|
||||
bail("unexpected end of file on ", in->name);
|
||||
return;
|
||||
}
|
||||
|
||||
/* skip the integral chunks, update skip with remainder */
|
||||
lseek(in->fd, skip - left, SEEK_CUR);
|
||||
skip = left;
|
||||
}
|
||||
|
||||
/* read more input and skip remainder */
|
||||
bload(in);
|
||||
if (skip > in->left)
|
||||
bail("unexpected end of file on ", in->name);
|
||||
in->left -= skip;
|
||||
in->next += skip;
|
||||
}
|
||||
|
||||
/* -- end of buffered input functions -- */
|
||||
|
||||
/* skip the gzip header from file in */
|
||||
local void gzhead(bin *in)
|
||||
{
|
||||
int flags;
|
||||
|
||||
/* verify gzip magic header and compression method */
|
||||
if (bget(in) != 0x1f || bget(in) != 0x8b || bget(in) != 8)
|
||||
bail(in->name, " is not a valid gzip file");
|
||||
|
||||
/* get and verify flags */
|
||||
flags = bget(in);
|
||||
if ((flags & 0xe0) != 0)
|
||||
bail("unknown reserved bits set in ", in->name);
|
||||
|
||||
/* skip modification time, extra flags, and os */
|
||||
bskip(in, 6);
|
||||
|
||||
/* skip extra field if present */
|
||||
if (flags & 4) {
|
||||
unsigned len;
|
||||
|
||||
len = bget(in);
|
||||
len += (unsigned)(bget(in)) << 8;
|
||||
bskip(in, len);
|
||||
}
|
||||
|
||||
/* skip file name if present */
|
||||
if (flags & 8)
|
||||
while (bget(in) != 0)
|
||||
;
|
||||
|
||||
/* skip comment if present */
|
||||
if (flags & 16)
|
||||
while (bget(in) != 0)
|
||||
;
|
||||
|
||||
/* skip header crc if present */
|
||||
if (flags & 2)
|
||||
bskip(in, 2);
|
||||
}
|
||||
|
||||
/* write a four-byte little-endian unsigned integer to out */
|
||||
local void put4(unsigned long val, FILE *out)
|
||||
{
|
||||
putc(val & 0xff, out);
|
||||
putc((val >> 8) & 0xff, out);
|
||||
putc((val >> 16) & 0xff, out);
|
||||
putc((val >> 24) & 0xff, out);
|
||||
}
|
||||
|
||||
/* Load up zlib stream from buffered input, bail if end of file */
|
||||
local void zpull(z_streamp strm, bin *in)
|
||||
{
|
||||
if (in->left == 0)
|
||||
bload(in);
|
||||
if (in->left == 0)
|
||||
bail("unexpected end of file on ", in->name);
|
||||
strm->avail_in = in->left;
|
||||
strm->next_in = in->next;
|
||||
}
|
||||
|
||||
/* Write header for gzip file to out and initialize trailer. */
|
||||
local void gzinit(unsigned long *crc, unsigned long *tot, FILE *out)
|
||||
{
|
||||
fwrite("\x1f\x8b\x08\0\0\0\0\0\0\xff", 1, 10, out);
|
||||
*crc = crc32(0L, Z_NULL, 0);
|
||||
*tot = 0;
|
||||
}
|
||||
|
||||
/* Copy the compressed data from name, zeroing the last block bit of the last
|
||||
block if clr is true, and adding empty blocks as needed to get to a byte
|
||||
boundary. If clr is false, then the last block becomes the last block of
|
||||
the output, and the gzip trailer is written. crc and tot maintains the
|
||||
crc and length (modulo 2^32) of the output for the trailer. The resulting
|
||||
gzip file is written to out. gzinit() must be called before the first call
|
||||
of gzcopy() to write the gzip header and to initialize crc and tot. */
|
||||
local void gzcopy(char *name, int clr, unsigned long *crc, unsigned long *tot,
|
||||
FILE *out)
|
||||
{
|
||||
int ret; /* return value from zlib functions */
|
||||
int pos; /* where the "last block" bit is in byte */
|
||||
int last; /* true if processing the last block */
|
||||
bin *in; /* buffered input file */
|
||||
unsigned char *start; /* start of compressed data in buffer */
|
||||
unsigned char *junk; /* buffer for uncompressed data -- discarded */
|
||||
z_off_t len; /* length of uncompressed data (support > 4 GB) */
|
||||
z_stream strm; /* zlib inflate stream */
|
||||
|
||||
/* open gzip file and skip header */
|
||||
in = bopen(name);
|
||||
if (in == NULL)
|
||||
bail("could not open ", name);
|
||||
gzhead(in);
|
||||
|
||||
/* allocate buffer for uncompressed data and initialize raw inflate
|
||||
stream */
|
||||
junk = malloc(CHUNK);
|
||||
strm.zalloc = Z_NULL;
|
||||
strm.zfree = Z_NULL;
|
||||
strm.opaque = Z_NULL;
|
||||
strm.avail_in = 0;
|
||||
strm.next_in = Z_NULL;
|
||||
ret = inflateInit2(&strm, -15);
|
||||
if (junk == NULL || ret != Z_OK)
|
||||
bail("out of memory", "");
|
||||
|
||||
/* inflate and copy compressed data, clear last-block bit if requested */
|
||||
len = 0;
|
||||
zpull(&strm, in);
|
||||
start = in->next;
|
||||
last = start[0] & 1;
|
||||
if (last && clr)
|
||||
start[0] &= ~1;
|
||||
strm.avail_out = 0;
|
||||
for (;;) {
|
||||
/* if input used and output done, write used input and get more */
|
||||
if (strm.avail_in == 0 && strm.avail_out != 0) {
|
||||
fwrite(start, 1, strm.next_in - start, out);
|
||||
start = in->buf;
|
||||
in->left = 0;
|
||||
zpull(&strm, in);
|
||||
}
|
||||
|
||||
/* decompress -- return early when end-of-block reached */
|
||||
strm.avail_out = CHUNK;
|
||||
strm.next_out = junk;
|
||||
ret = inflate(&strm, Z_BLOCK);
|
||||
switch (ret) {
|
||||
case Z_MEM_ERROR:
|
||||
bail("out of memory", "");
|
||||
case Z_DATA_ERROR:
|
||||
bail("invalid compressed data in ", in->name);
|
||||
}
|
||||
|
||||
/* update length of uncompressed data */
|
||||
len += CHUNK - strm.avail_out;
|
||||
|
||||
/* check for block boundary (only get this when block copied out) */
|
||||
if (strm.data_type & 128) {
|
||||
/* if that was the last block, then done */
|
||||
if (last)
|
||||
break;
|
||||
|
||||
/* number of unused bits in last byte */
|
||||
pos = strm.data_type & 7;
|
||||
|
||||
/* find the next last-block bit */
|
||||
if (pos != 0) {
|
||||
/* next last-block bit is in last used byte */
|
||||
pos = 0x100 >> pos;
|
||||
last = strm.next_in[-1] & pos;
|
||||
if (last && clr)
|
||||
in->buf[strm.next_in - in->buf - 1] &= ~pos;
|
||||
}
|
||||
else {
|
||||
/* next last-block bit is in next unused byte */
|
||||
if (strm.avail_in == 0) {
|
||||
/* don't have that byte yet -- get it */
|
||||
fwrite(start, 1, strm.next_in - start, out);
|
||||
start = in->buf;
|
||||
in->left = 0;
|
||||
zpull(&strm, in);
|
||||
}
|
||||
last = strm.next_in[0] & 1;
|
||||
if (last && clr)
|
||||
in->buf[strm.next_in - in->buf] &= ~1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* update buffer with unused input */
|
||||
in->left = strm.avail_in;
|
||||
in->next = in->buf + (strm.next_in - in->buf);
|
||||
|
||||
/* copy used input, write empty blocks to get to byte boundary */
|
||||
pos = strm.data_type & 7;
|
||||
fwrite(start, 1, in->next - start - 1, out);
|
||||
last = in->next[-1];
|
||||
if (pos == 0 || !clr)
|
||||
/* already at byte boundary, or last file: write last byte */
|
||||
putc(last, out);
|
||||
else {
|
||||
/* append empty blocks to last byte */
|
||||
last &= ((0x100 >> pos) - 1); /* assure unused bits are zero */
|
||||
if (pos & 1) {
|
||||
/* odd -- append an empty stored block */
|
||||
putc(last, out);
|
||||
if (pos == 1)
|
||||
putc(0, out); /* two more bits in block header */
|
||||
fwrite("\0\0\xff\xff", 1, 4, out);
|
||||
}
|
||||
else {
|
||||
/* even -- append 1, 2, or 3 empty fixed blocks */
|
||||
switch (pos) {
|
||||
case 6:
|
||||
putc(last | 8, out);
|
||||
last = 0;
|
||||
case 4:
|
||||
putc(last | 0x20, out);
|
||||
last = 0;
|
||||
case 2:
|
||||
putc(last | 0x80, out);
|
||||
putc(0, out);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* update crc and tot */
|
||||
*crc = crc32_combine(*crc, bget4(in), len);
|
||||
*tot += (unsigned long)len;
|
||||
|
||||
/* clean up */
|
||||
inflateEnd(&strm);
|
||||
free(junk);
|
||||
bclose(in);
|
||||
|
||||
/* write trailer if this is the last gzip file */
|
||||
if (!clr) {
|
||||
put4(*crc, out);
|
||||
put4(*tot, out);
|
||||
}
|
||||
}
|
||||
|
||||
/* join the gzip files on the command line, write result to stdout */
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
unsigned long crc, tot; /* running crc and total uncompressed length */
|
||||
|
||||
/* skip command name */
|
||||
argc--;
|
||||
argv++;
|
||||
|
||||
/* show usage if no arguments */
|
||||
if (argc == 0) {
|
||||
fputs("gzjoin usage: gzjoin f1.gz [f2.gz [f3.gz ...]] > fjoin.gz\n",
|
||||
stderr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* join gzip files on command line and write to stdout */
|
||||
gzinit(&crc, &tot, stdout);
|
||||
while (argc--)
|
||||
gzcopy(*argv++, argc, &crc, &tot, stdout);
|
||||
|
||||
/* done */
|
||||
return 0;
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,91 @@
|
|||
/* gzlog.h
|
||||
Copyright (C) 2004, 2008, 2012 Mark Adler, all rights reserved
|
||||
version 2.2, 14 Aug 2012
|
||||
|
||||
This software is provided 'as-is', without any express or implied
|
||||
warranty. In no event will the author be held liable for any damages
|
||||
arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not
|
||||
claim that you wrote the original software. If you use this software
|
||||
in a product, an acknowledgment in the product documentation would be
|
||||
appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
Mark Adler madler@alumni.caltech.edu
|
||||
*/
|
||||
|
||||
/* Version History:
|
||||
1.0 26 Nov 2004 First version
|
||||
2.0 25 Apr 2008 Complete redesign for recovery of interrupted operations
|
||||
Interface changed slightly in that now path is a prefix
|
||||
Compression now occurs as needed during gzlog_write()
|
||||
gzlog_write() now always leaves the log file as valid gzip
|
||||
2.1 8 Jul 2012 Fix argument checks in gzlog_compress() and gzlog_write()
|
||||
2.2 14 Aug 2012 Clean up signed comparisons
|
||||
*/
|
||||
|
||||
/*
|
||||
The gzlog object allows writing short messages to a gzipped log file,
|
||||
opening the log file locked for small bursts, and then closing it. The log
|
||||
object works by appending stored (uncompressed) data to the gzip file until
|
||||
1 MB has been accumulated. At that time, the stored data is compressed, and
|
||||
replaces the uncompressed data in the file. The log file is truncated to
|
||||
its new size at that time. After each write operation, the log file is a
|
||||
valid gzip file that can decompressed to recover what was written.
|
||||
|
||||
The gzlog operations can be interupted at any point due to an application or
|
||||
system crash, and the log file will be recovered the next time the log is
|
||||
opened with gzlog_open().
|
||||
*/
|
||||
|
||||
#ifndef GZLOG_H
|
||||
#define GZLOG_H
|
||||
|
||||
/* gzlog object type */
|
||||
typedef void gzlog;
|
||||
|
||||
/* Open a gzlog object, creating the log file if it does not exist. Return
|
||||
NULL on error. Note that gzlog_open() could take a while to complete if it
|
||||
has to wait to verify that a lock is stale (possibly for five minutes), or
|
||||
if there is significant contention with other instantiations of this object
|
||||
when locking the resource. path is the prefix of the file names created by
|
||||
this object. If path is "foo", then the log file will be "foo.gz", and
|
||||
other auxiliary files will be created and destroyed during the process:
|
||||
"foo.dict" for a compression dictionary, "foo.temp" for a temporary (next)
|
||||
dictionary, "foo.add" for data being added or compressed, "foo.lock" for the
|
||||
lock file, and "foo.repairs" to log recovery operations performed due to
|
||||
interrupted gzlog operations. A gzlog_open() followed by a gzlog_close()
|
||||
will recover a previously interrupted operation, if any. */
|
||||
gzlog *gzlog_open(char *path);
|
||||
|
||||
/* Write to a gzlog object. Return zero on success, -1 if there is a file i/o
|
||||
error on any of the gzlog files (this should not happen if gzlog_open()
|
||||
succeeded, unless the device has run out of space or leftover auxiliary
|
||||
files have permissions or ownership that prevent their use), -2 if there is
|
||||
a memory allocation failure, or -3 if the log argument is invalid (e.g. if
|
||||
it was not created by gzlog_open()). This function will write data to the
|
||||
file uncompressed, until 1 MB has been accumulated, at which time that data
|
||||
will be compressed. The log file will be a valid gzip file upon successful
|
||||
return. */
|
||||
int gzlog_write(gzlog *log, void *data, size_t len);
|
||||
|
||||
/* Force compression of any uncompressed data in the log. This should be used
|
||||
sparingly, if at all. The main application would be when a log file will
|
||||
not be appended to again. If this is used to compress frequently while
|
||||
appending, it will both significantly increase the execution time and
|
||||
reduce the compression ratio. The return codes are the same as for
|
||||
gzlog_write(). */
|
||||
int gzlog_compress(gzlog *log);
|
||||
|
||||
/* Close a gzlog object. Return zero on success, -3 if the log argument is
|
||||
invalid. The log object is freed, and so cannot be referenced again. */
|
||||
int gzlog_close(gzlog *log);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,545 @@
|
|||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"
|
||||
"http://www.w3.org/TR/REC-html40/loose.dtd">
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
|
||||
<title>zlib Usage Example</title>
|
||||
<!-- Copyright (c) 2004, 2005 Mark Adler. -->
|
||||
</head>
|
||||
<body bgcolor="#FFFFFF" text="#000000" link="#0000FF" vlink="#00A000">
|
||||
<h2 align="center"> zlib Usage Example </h2>
|
||||
We often get questions about how the <tt>deflate()</tt> and <tt>inflate()</tt> functions should be used.
|
||||
Users wonder when they should provide more input, when they should use more output,
|
||||
what to do with a <tt>Z_BUF_ERROR</tt>, how to make sure the process terminates properly, and
|
||||
so on. So for those who have read <tt>zlib.h</tt> (a few times), and
|
||||
would like further edification, below is an annotated example in C of simple routines to compress and decompress
|
||||
from an input file to an output file using <tt>deflate()</tt> and <tt>inflate()</tt> respectively. The
|
||||
annotations are interspersed between lines of the code. So please read between the lines.
|
||||
We hope this helps explain some of the intricacies of <em>zlib</em>.
|
||||
<p>
|
||||
Without further adieu, here is the program <a href="zpipe.c"><tt>zpipe.c</tt></a>:
|
||||
<pre><b>
|
||||
/* zpipe.c: example of proper use of zlib's inflate() and deflate()
|
||||
Not copyrighted -- provided to the public domain
|
||||
Version 1.4 11 December 2005 Mark Adler */
|
||||
|
||||
/* Version history:
|
||||
1.0 30 Oct 2004 First version
|
||||
1.1 8 Nov 2004 Add void casting for unused return values
|
||||
Use switch statement for inflate() return values
|
||||
1.2 9 Nov 2004 Add assertions to document zlib guarantees
|
||||
1.3 6 Apr 2005 Remove incorrect assertion in inf()
|
||||
1.4 11 Dec 2005 Add hack to avoid MSDOS end-of-line conversions
|
||||
Avoid some compiler warnings for input and output buffers
|
||||
*/
|
||||
</b></pre><!-- -->
|
||||
We now include the header files for the required definitions. From
|
||||
<tt>stdio.h</tt> we use <tt>fopen()</tt>, <tt>fread()</tt>, <tt>fwrite()</tt>,
|
||||
<tt>feof()</tt>, <tt>ferror()</tt>, and <tt>fclose()</tt> for file i/o, and
|
||||
<tt>fputs()</tt> for error messages. From <tt>string.h</tt> we use
|
||||
<tt>strcmp()</tt> for command line argument processing.
|
||||
From <tt>assert.h</tt> we use the <tt>assert()</tt> macro.
|
||||
From <tt>zlib.h</tt>
|
||||
we use the basic compression functions <tt>deflateInit()</tt>,
|
||||
<tt>deflate()</tt>, and <tt>deflateEnd()</tt>, and the basic decompression
|
||||
functions <tt>inflateInit()</tt>, <tt>inflate()</tt>, and
|
||||
<tt>inflateEnd()</tt>.
|
||||
<pre><b>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include "zlib.h"
|
||||
</b></pre><!-- -->
|
||||
This is an ugly hack required to avoid corruption of the input and output data on
|
||||
Windows/MS-DOS systems. Without this, those systems would assume that the input and output
|
||||
files are text, and try to convert the end-of-line characters from one standard to
|
||||
another. That would corrupt binary data, and in particular would render the compressed data unusable.
|
||||
This sets the input and output to binary which suppresses the end-of-line conversions.
|
||||
<tt>SET_BINARY_MODE()</tt> will be used later on <tt>stdin</tt> and <tt>stdout</tt>, at the beginning of <tt>main()</tt>.
|
||||
<pre><b>
|
||||
#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__)
|
||||
# include <fcntl.h>
|
||||
# include <io.h>
|
||||
# define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY)
|
||||
#else
|
||||
# define SET_BINARY_MODE(file)
|
||||
#endif
|
||||
</b></pre><!-- -->
|
||||
<tt>CHUNK</tt> is simply the buffer size for feeding data to and pulling data
|
||||
from the <em>zlib</em> routines. Larger buffer sizes would be more efficient,
|
||||
especially for <tt>inflate()</tt>. If the memory is available, buffers sizes
|
||||
on the order of 128K or 256K bytes should be used.
|
||||
<pre><b>
|
||||
#define CHUNK 16384
|
||||
</b></pre><!-- -->
|
||||
The <tt>def()</tt> routine compresses data from an input file to an output file. The output data
|
||||
will be in the <em>zlib</em> format, which is different from the <em>gzip</em> or <em>zip</em>
|
||||
formats. The <em>zlib</em> format has a very small header of only two bytes to identify it as
|
||||
a <em>zlib</em> stream and to provide decoding information, and a four-byte trailer with a fast
|
||||
check value to verify the integrity of the uncompressed data after decoding.
|
||||
<pre><b>
|
||||
/* Compress from file source to file dest until EOF on source.
|
||||
def() returns Z_OK on success, Z_MEM_ERROR if memory could not be
|
||||
allocated for processing, Z_STREAM_ERROR if an invalid compression
|
||||
level is supplied, Z_VERSION_ERROR if the version of zlib.h and the
|
||||
version of the library linked do not match, or Z_ERRNO if there is
|
||||
an error reading or writing the files. */
|
||||
int def(FILE *source, FILE *dest, int level)
|
||||
{
|
||||
</b></pre>
|
||||
Here are the local variables for <tt>def()</tt>. <tt>ret</tt> will be used for <em>zlib</em>
|
||||
return codes. <tt>flush</tt> will keep track of the current flushing state for <tt>deflate()</tt>,
|
||||
which is either no flushing, or flush to completion after the end of the input file is reached.
|
||||
<tt>have</tt> is the amount of data returned from <tt>deflate()</tt>. The <tt>strm</tt> structure
|
||||
is used to pass information to and from the <em>zlib</em> routines, and to maintain the
|
||||
<tt>deflate()</tt> state. <tt>in</tt> and <tt>out</tt> are the input and output buffers for
|
||||
<tt>deflate()</tt>.
|
||||
<pre><b>
|
||||
int ret, flush;
|
||||
unsigned have;
|
||||
z_stream strm;
|
||||
unsigned char in[CHUNK];
|
||||
unsigned char out[CHUNK];
|
||||
</b></pre><!-- -->
|
||||
The first thing we do is to initialize the <em>zlib</em> state for compression using
|
||||
<tt>deflateInit()</tt>. This must be done before the first use of <tt>deflate()</tt>.
|
||||
The <tt>zalloc</tt>, <tt>zfree</tt>, and <tt>opaque</tt> fields in the <tt>strm</tt>
|
||||
structure must be initialized before calling <tt>deflateInit()</tt>. Here they are
|
||||
set to the <em>zlib</em> constant <tt>Z_NULL</tt> to request that <em>zlib</em> use
|
||||
the default memory allocation routines. An application may also choose to provide
|
||||
custom memory allocation routines here. <tt>deflateInit()</tt> will allocate on the
|
||||
order of 256K bytes for the internal state.
|
||||
(See <a href="zlib_tech.html"><em>zlib Technical Details</em></a>.)
|
||||
<p>
|
||||
<tt>deflateInit()</tt> is called with a pointer to the structure to be initialized and
|
||||
the compression level, which is an integer in the range of -1 to 9. Lower compression
|
||||
levels result in faster execution, but less compression. Higher levels result in
|
||||
greater compression, but slower execution. The <em>zlib</em> constant Z_DEFAULT_COMPRESSION,
|
||||
equal to -1,
|
||||
provides a good compromise between compression and speed and is equivalent to level 6.
|
||||
Level 0 actually does no compression at all, and in fact expands the data slightly to produce
|
||||
the <em>zlib</em> format (it is not a byte-for-byte copy of the input).
|
||||
More advanced applications of <em>zlib</em>
|
||||
may use <tt>deflateInit2()</tt> here instead. Such an application may want to reduce how
|
||||
much memory will be used, at some price in compression. Or it may need to request a
|
||||
<em>gzip</em> header and trailer instead of a <em>zlib</em> header and trailer, or raw
|
||||
encoding with no header or trailer at all.
|
||||
<p>
|
||||
We must check the return value of <tt>deflateInit()</tt> against the <em>zlib</em> constant
|
||||
<tt>Z_OK</tt> to make sure that it was able to
|
||||
allocate memory for the internal state, and that the provided arguments were valid.
|
||||
<tt>deflateInit()</tt> will also check that the version of <em>zlib</em> that the <tt>zlib.h</tt>
|
||||
file came from matches the version of <em>zlib</em> actually linked with the program. This
|
||||
is especially important for environments in which <em>zlib</em> is a shared library.
|
||||
<p>
|
||||
Note that an application can initialize multiple, independent <em>zlib</em> streams, which can
|
||||
operate in parallel. The state information maintained in the structure allows the <em>zlib</em>
|
||||
routines to be reentrant.
|
||||
<pre><b>
|
||||
/* allocate deflate state */
|
||||
strm.zalloc = Z_NULL;
|
||||
strm.zfree = Z_NULL;
|
||||
strm.opaque = Z_NULL;
|
||||
ret = deflateInit(&strm, level);
|
||||
if (ret != Z_OK)
|
||||
return ret;
|
||||
</b></pre><!-- -->
|
||||
With the pleasantries out of the way, now we can get down to business. The outer <tt>do</tt>-loop
|
||||
reads all of the input file and exits at the bottom of the loop once end-of-file is reached.
|
||||
This loop contains the only call of <tt>deflate()</tt>. So we must make sure that all of the
|
||||
input data has been processed and that all of the output data has been generated and consumed
|
||||
before we fall out of the loop at the bottom.
|
||||
<pre><b>
|
||||
/* compress until end of file */
|
||||
do {
|
||||
</b></pre>
|
||||
We start off by reading data from the input file. The number of bytes read is put directly
|
||||
into <tt>avail_in</tt>, and a pointer to those bytes is put into <tt>next_in</tt>. We also
|
||||
check to see if end-of-file on the input has been reached. If we are at the end of file, then <tt>flush</tt> is set to the
|
||||
<em>zlib</em> constant <tt>Z_FINISH</tt>, which is later passed to <tt>deflate()</tt> to
|
||||
indicate that this is the last chunk of input data to compress. We need to use <tt>feof()</tt>
|
||||
to check for end-of-file as opposed to seeing if fewer than <tt>CHUNK</tt> bytes have been read. The
|
||||
reason is that if the input file length is an exact multiple of <tt>CHUNK</tt>, we will miss
|
||||
the fact that we got to the end-of-file, and not know to tell <tt>deflate()</tt> to finish
|
||||
up the compressed stream. If we are not yet at the end of the input, then the <em>zlib</em>
|
||||
constant <tt>Z_NO_FLUSH</tt> will be passed to <tt>deflate</tt> to indicate that we are still
|
||||
in the middle of the uncompressed data.
|
||||
<p>
|
||||
If there is an error in reading from the input file, the process is aborted with
|
||||
<tt>deflateEnd()</tt> being called to free the allocated <em>zlib</em> state before returning
|
||||
the error. We wouldn't want a memory leak, now would we? <tt>deflateEnd()</tt> can be called
|
||||
at any time after the state has been initialized. Once that's done, <tt>deflateInit()</tt> (or
|
||||
<tt>deflateInit2()</tt>) would have to be called to start a new compression process. There is
|
||||
no point here in checking the <tt>deflateEnd()</tt> return code. The deallocation can't fail.
|
||||
<pre><b>
|
||||
strm.avail_in = fread(in, 1, CHUNK, source);
|
||||
if (ferror(source)) {
|
||||
(void)deflateEnd(&strm);
|
||||
return Z_ERRNO;
|
||||
}
|
||||
flush = feof(source) ? Z_FINISH : Z_NO_FLUSH;
|
||||
strm.next_in = in;
|
||||
</b></pre><!-- -->
|
||||
The inner <tt>do</tt>-loop passes our chunk of input data to <tt>deflate()</tt>, and then
|
||||
keeps calling <tt>deflate()</tt> until it is done producing output. Once there is no more
|
||||
new output, <tt>deflate()</tt> is guaranteed to have consumed all of the input, i.e.,
|
||||
<tt>avail_in</tt> will be zero.
|
||||
<pre><b>
|
||||
/* run deflate() on input until output buffer not full, finish
|
||||
compression if all of source has been read in */
|
||||
do {
|
||||
</b></pre>
|
||||
Output space is provided to <tt>deflate()</tt> by setting <tt>avail_out</tt> to the number
|
||||
of available output bytes and <tt>next_out</tt> to a pointer to that space.
|
||||
<pre><b>
|
||||
strm.avail_out = CHUNK;
|
||||
strm.next_out = out;
|
||||
</b></pre>
|
||||
Now we call the compression engine itself, <tt>deflate()</tt>. It takes as many of the
|
||||
<tt>avail_in</tt> bytes at <tt>next_in</tt> as it can process, and writes as many as
|
||||
<tt>avail_out</tt> bytes to <tt>next_out</tt>. Those counters and pointers are then
|
||||
updated past the input data consumed and the output data written. It is the amount of
|
||||
output space available that may limit how much input is consumed.
|
||||
Hence the inner loop to make sure that
|
||||
all of the input is consumed by providing more output space each time. Since <tt>avail_in</tt>
|
||||
and <tt>next_in</tt> are updated by <tt>deflate()</tt>, we don't have to mess with those
|
||||
between <tt>deflate()</tt> calls until it's all used up.
|
||||
<p>
|
||||
The parameters to <tt>deflate()</tt> are a pointer to the <tt>strm</tt> structure containing
|
||||
the input and output information and the internal compression engine state, and a parameter
|
||||
indicating whether and how to flush data to the output. Normally <tt>deflate</tt> will consume
|
||||
several K bytes of input data before producing any output (except for the header), in order
|
||||
to accumulate statistics on the data for optimum compression. It will then put out a burst of
|
||||
compressed data, and proceed to consume more input before the next burst. Eventually,
|
||||
<tt>deflate()</tt>
|
||||
must be told to terminate the stream, complete the compression with provided input data, and
|
||||
write out the trailer check value. <tt>deflate()</tt> will continue to compress normally as long
|
||||
as the flush parameter is <tt>Z_NO_FLUSH</tt>. Once the <tt>Z_FINISH</tt> parameter is provided,
|
||||
<tt>deflate()</tt> will begin to complete the compressed output stream. However depending on how
|
||||
much output space is provided, <tt>deflate()</tt> may have to be called several times until it
|
||||
has provided the complete compressed stream, even after it has consumed all of the input. The flush
|
||||
parameter must continue to be <tt>Z_FINISH</tt> for those subsequent calls.
|
||||
<p>
|
||||
There are other values of the flush parameter that are used in more advanced applications. You can
|
||||
force <tt>deflate()</tt> to produce a burst of output that encodes all of the input data provided
|
||||
so far, even if it wouldn't have otherwise, for example to control data latency on a link with
|
||||
compressed data. You can also ask that <tt>deflate()</tt> do that as well as erase any history up to
|
||||
that point so that what follows can be decompressed independently, for example for random access
|
||||
applications. Both requests will degrade compression by an amount depending on how often such
|
||||
requests are made.
|
||||
<p>
|
||||
<tt>deflate()</tt> has a return value that can indicate errors, yet we do not check it here. Why
|
||||
not? Well, it turns out that <tt>deflate()</tt> can do no wrong here. Let's go through
|
||||
<tt>deflate()</tt>'s return values and dispense with them one by one. The possible values are
|
||||
<tt>Z_OK</tt>, <tt>Z_STREAM_END</tt>, <tt>Z_STREAM_ERROR</tt>, or <tt>Z_BUF_ERROR</tt>. <tt>Z_OK</tt>
|
||||
is, well, ok. <tt>Z_STREAM_END</tt> is also ok and will be returned for the last call of
|
||||
<tt>deflate()</tt>. This is already guaranteed by calling <tt>deflate()</tt> with <tt>Z_FINISH</tt>
|
||||
until it has no more output. <tt>Z_STREAM_ERROR</tt> is only possible if the stream is not
|
||||
initialized properly, but we did initialize it properly. There is no harm in checking for
|
||||
<tt>Z_STREAM_ERROR</tt> here, for example to check for the possibility that some
|
||||
other part of the application inadvertently clobbered the memory containing the <em>zlib</em> state.
|
||||
<tt>Z_BUF_ERROR</tt> will be explained further below, but
|
||||
suffice it to say that this is simply an indication that <tt>deflate()</tt> could not consume
|
||||
more input or produce more output. <tt>deflate()</tt> can be called again with more output space
|
||||
or more available input, which it will be in this code.
|
||||
<pre><b>
|
||||
ret = deflate(&strm, flush); /* no bad return value */
|
||||
assert(ret != Z_STREAM_ERROR); /* state not clobbered */
|
||||
</b></pre>
|
||||
Now we compute how much output <tt>deflate()</tt> provided on the last call, which is the
|
||||
difference between how much space was provided before the call, and how much output space
|
||||
is still available after the call. Then that data, if any, is written to the output file.
|
||||
We can then reuse the output buffer for the next call of <tt>deflate()</tt>. Again if there
|
||||
is a file i/o error, we call <tt>deflateEnd()</tt> before returning to avoid a memory leak.
|
||||
<pre><b>
|
||||
have = CHUNK - strm.avail_out;
|
||||
if (fwrite(out, 1, have, dest) != have || ferror(dest)) {
|
||||
(void)deflateEnd(&strm);
|
||||
return Z_ERRNO;
|
||||
}
|
||||
</b></pre>
|
||||
The inner <tt>do</tt>-loop is repeated until the last <tt>deflate()</tt> call fails to fill the
|
||||
provided output buffer. Then we know that <tt>deflate()</tt> has done as much as it can with
|
||||
the provided input, and that all of that input has been consumed. We can then fall out of this
|
||||
loop and reuse the input buffer.
|
||||
<p>
|
||||
The way we tell that <tt>deflate()</tt> has no more output is by seeing that it did not fill
|
||||
the output buffer, leaving <tt>avail_out</tt> greater than zero. However suppose that
|
||||
<tt>deflate()</tt> has no more output, but just so happened to exactly fill the output buffer!
|
||||
<tt>avail_out</tt> is zero, and we can't tell that <tt>deflate()</tt> has done all it can.
|
||||
As far as we know, <tt>deflate()</tt>
|
||||
has more output for us. So we call it again. But now <tt>deflate()</tt> produces no output
|
||||
at all, and <tt>avail_out</tt> remains unchanged as <tt>CHUNK</tt>. That <tt>deflate()</tt> call
|
||||
wasn't able to do anything, either consume input or produce output, and so it returns
|
||||
<tt>Z_BUF_ERROR</tt>. (See, I told you I'd cover this later.) However this is not a problem at
|
||||
all. Now we finally have the desired indication that <tt>deflate()</tt> is really done,
|
||||
and so we drop out of the inner loop to provide more input to <tt>deflate()</tt>.
|
||||
<p>
|
||||
With <tt>flush</tt> set to <tt>Z_FINISH</tt>, this final set of <tt>deflate()</tt> calls will
|
||||
complete the output stream. Once that is done, subsequent calls of <tt>deflate()</tt> would return
|
||||
<tt>Z_STREAM_ERROR</tt> if the flush parameter is not <tt>Z_FINISH</tt>, and do no more processing
|
||||
until the state is reinitialized.
|
||||
<p>
|
||||
Some applications of <em>zlib</em> have two loops that call <tt>deflate()</tt>
|
||||
instead of the single inner loop we have here. The first loop would call
|
||||
without flushing and feed all of the data to <tt>deflate()</tt>. The second loop would call
|
||||
<tt>deflate()</tt> with no more
|
||||
data and the <tt>Z_FINISH</tt> parameter to complete the process. As you can see from this
|
||||
example, that can be avoided by simply keeping track of the current flush state.
|
||||
<pre><b>
|
||||
} while (strm.avail_out == 0);
|
||||
assert(strm.avail_in == 0); /* all input will be used */
|
||||
</b></pre><!-- -->
|
||||
Now we check to see if we have already processed all of the input file. That information was
|
||||
saved in the <tt>flush</tt> variable, so we see if that was set to <tt>Z_FINISH</tt>. If so,
|
||||
then we're done and we fall out of the outer loop. We're guaranteed to get <tt>Z_STREAM_END</tt>
|
||||
from the last <tt>deflate()</tt> call, since we ran it until the last chunk of input was
|
||||
consumed and all of the output was generated.
|
||||
<pre><b>
|
||||
/* done when last data in file processed */
|
||||
} while (flush != Z_FINISH);
|
||||
assert(ret == Z_STREAM_END); /* stream will be complete */
|
||||
</b></pre><!-- -->
|
||||
The process is complete, but we still need to deallocate the state to avoid a memory leak
|
||||
(or rather more like a memory hemorrhage if you didn't do this). Then
|
||||
finally we can return with a happy return value.
|
||||
<pre><b>
|
||||
/* clean up and return */
|
||||
(void)deflateEnd(&strm);
|
||||
return Z_OK;
|
||||
}
|
||||
</b></pre><!-- -->
|
||||
Now we do the same thing for decompression in the <tt>inf()</tt> routine. <tt>inf()</tt>
|
||||
decompresses what is hopefully a valid <em>zlib</em> stream from the input file and writes the
|
||||
uncompressed data to the output file. Much of the discussion above for <tt>def()</tt>
|
||||
applies to <tt>inf()</tt> as well, so the discussion here will focus on the differences between
|
||||
the two.
|
||||
<pre><b>
|
||||
/* Decompress from file source to file dest until stream ends or EOF.
|
||||
inf() returns Z_OK on success, Z_MEM_ERROR if memory could not be
|
||||
allocated for processing, Z_DATA_ERROR if the deflate data is
|
||||
invalid or incomplete, Z_VERSION_ERROR if the version of zlib.h and
|
||||
the version of the library linked do not match, or Z_ERRNO if there
|
||||
is an error reading or writing the files. */
|
||||
int inf(FILE *source, FILE *dest)
|
||||
{
|
||||
</b></pre>
|
||||
The local variables have the same functionality as they do for <tt>def()</tt>. The
|
||||
only difference is that there is no <tt>flush</tt> variable, since <tt>inflate()</tt>
|
||||
can tell from the <em>zlib</em> stream itself when the stream is complete.
|
||||
<pre><b>
|
||||
int ret;
|
||||
unsigned have;
|
||||
z_stream strm;
|
||||
unsigned char in[CHUNK];
|
||||
unsigned char out[CHUNK];
|
||||
</b></pre><!-- -->
|
||||
The initialization of the state is the same, except that there is no compression level,
|
||||
of course, and two more elements of the structure are initialized. <tt>avail_in</tt>
|
||||
and <tt>next_in</tt> must be initialized before calling <tt>inflateInit()</tt>. This
|
||||
is because the application has the option to provide the start of the zlib stream in
|
||||
order for <tt>inflateInit()</tt> to have access to information about the compression
|
||||
method to aid in memory allocation. In the current implementation of <em>zlib</em>
|
||||
(up through versions 1.2.x), the method-dependent memory allocations are deferred to the first call of
|
||||
<tt>inflate()</tt> anyway. However those fields must be initialized since later versions
|
||||
of <em>zlib</em> that provide more compression methods may take advantage of this interface.
|
||||
In any case, no decompression is performed by <tt>inflateInit()</tt>, so the
|
||||
<tt>avail_out</tt> and <tt>next_out</tt> fields do not need to be initialized before calling.
|
||||
<p>
|
||||
Here <tt>avail_in</tt> is set to zero and <tt>next_in</tt> is set to <tt>Z_NULL</tt> to
|
||||
indicate that no input data is being provided.
|
||||
<pre><b>
|
||||
/* allocate inflate state */
|
||||
strm.zalloc = Z_NULL;
|
||||
strm.zfree = Z_NULL;
|
||||
strm.opaque = Z_NULL;
|
||||
strm.avail_in = 0;
|
||||
strm.next_in = Z_NULL;
|
||||
ret = inflateInit(&strm);
|
||||
if (ret != Z_OK)
|
||||
return ret;
|
||||
</b></pre><!-- -->
|
||||
The outer <tt>do</tt>-loop decompresses input until <tt>inflate()</tt> indicates
|
||||
that it has reached the end of the compressed data and has produced all of the uncompressed
|
||||
output. This is in contrast to <tt>def()</tt> which processes all of the input file.
|
||||
If end-of-file is reached before the compressed data self-terminates, then the compressed
|
||||
data is incomplete and an error is returned.
|
||||
<pre><b>
|
||||
/* decompress until deflate stream ends or end of file */
|
||||
do {
|
||||
</b></pre>
|
||||
We read input data and set the <tt>strm</tt> structure accordingly. If we've reached the
|
||||
end of the input file, then we leave the outer loop and report an error, since the
|
||||
compressed data is incomplete. Note that we may read more data than is eventually consumed
|
||||
by <tt>inflate()</tt>, if the input file continues past the <em>zlib</em> stream.
|
||||
For applications where <em>zlib</em> streams are embedded in other data, this routine would
|
||||
need to be modified to return the unused data, or at least indicate how much of the input
|
||||
data was not used, so the application would know where to pick up after the <em>zlib</em> stream.
|
||||
<pre><b>
|
||||
strm.avail_in = fread(in, 1, CHUNK, source);
|
||||
if (ferror(source)) {
|
||||
(void)inflateEnd(&strm);
|
||||
return Z_ERRNO;
|
||||
}
|
||||
if (strm.avail_in == 0)
|
||||
break;
|
||||
strm.next_in = in;
|
||||
</b></pre><!-- -->
|
||||
The inner <tt>do</tt>-loop has the same function it did in <tt>def()</tt>, which is to
|
||||
keep calling <tt>inflate()</tt> until has generated all of the output it can with the
|
||||
provided input.
|
||||
<pre><b>
|
||||
/* run inflate() on input until output buffer not full */
|
||||
do {
|
||||
</b></pre>
|
||||
Just like in <tt>def()</tt>, the same output space is provided for each call of <tt>inflate()</tt>.
|
||||
<pre><b>
|
||||
strm.avail_out = CHUNK;
|
||||
strm.next_out = out;
|
||||
</b></pre>
|
||||
Now we run the decompression engine itself. There is no need to adjust the flush parameter, since
|
||||
the <em>zlib</em> format is self-terminating. The main difference here is that there are
|
||||
return values that we need to pay attention to. <tt>Z_DATA_ERROR</tt>
|
||||
indicates that <tt>inflate()</tt> detected an error in the <em>zlib</em> compressed data format,
|
||||
which means that either the data is not a <em>zlib</em> stream to begin with, or that the data was
|
||||
corrupted somewhere along the way since it was compressed. The other error to be processed is
|
||||
<tt>Z_MEM_ERROR</tt>, which can occur since memory allocation is deferred until <tt>inflate()</tt>
|
||||
needs it, unlike <tt>deflate()</tt>, whose memory is allocated at the start by <tt>deflateInit()</tt>.
|
||||
<p>
|
||||
Advanced applications may use
|
||||
<tt>deflateSetDictionary()</tt> to prime <tt>deflate()</tt> with a set of likely data to improve the
|
||||
first 32K or so of compression. This is noted in the <em>zlib</em> header, so <tt>inflate()</tt>
|
||||
requests that that dictionary be provided before it can start to decompress. Without the dictionary,
|
||||
correct decompression is not possible. For this routine, we have no idea what the dictionary is,
|
||||
so the <tt>Z_NEED_DICT</tt> indication is converted to a <tt>Z_DATA_ERROR</tt>.
|
||||
<p>
|
||||
<tt>inflate()</tt> can also return <tt>Z_STREAM_ERROR</tt>, which should not be possible here,
|
||||
but could be checked for as noted above for <tt>def()</tt>. <tt>Z_BUF_ERROR</tt> does not need to be
|
||||
checked for here, for the same reasons noted for <tt>def()</tt>. <tt>Z_STREAM_END</tt> will be
|
||||
checked for later.
|
||||
<pre><b>
|
||||
ret = inflate(&strm, Z_NO_FLUSH);
|
||||
assert(ret != Z_STREAM_ERROR); /* state not clobbered */
|
||||
switch (ret) {
|
||||
case Z_NEED_DICT:
|
||||
ret = Z_DATA_ERROR; /* and fall through */
|
||||
case Z_DATA_ERROR:
|
||||
case Z_MEM_ERROR:
|
||||
(void)inflateEnd(&strm);
|
||||
return ret;
|
||||
}
|
||||
</b></pre>
|
||||
The output of <tt>inflate()</tt> is handled identically to that of <tt>deflate()</tt>.
|
||||
<pre><b>
|
||||
have = CHUNK - strm.avail_out;
|
||||
if (fwrite(out, 1, have, dest) != have || ferror(dest)) {
|
||||
(void)inflateEnd(&strm);
|
||||
return Z_ERRNO;
|
||||
}
|
||||
</b></pre>
|
||||
The inner <tt>do</tt>-loop ends when <tt>inflate()</tt> has no more output as indicated
|
||||
by not filling the output buffer, just as for <tt>deflate()</tt>. In this case, we cannot
|
||||
assert that <tt>strm.avail_in</tt> will be zero, since the deflate stream may end before the file
|
||||
does.
|
||||
<pre><b>
|
||||
} while (strm.avail_out == 0);
|
||||
</b></pre><!-- -->
|
||||
The outer <tt>do</tt>-loop ends when <tt>inflate()</tt> reports that it has reached the
|
||||
end of the input <em>zlib</em> stream, has completed the decompression and integrity
|
||||
check, and has provided all of the output. This is indicated by the <tt>inflate()</tt>
|
||||
return value <tt>Z_STREAM_END</tt>. The inner loop is guaranteed to leave <tt>ret</tt>
|
||||
equal to <tt>Z_STREAM_END</tt> if the last chunk of the input file read contained the end
|
||||
of the <em>zlib</em> stream. So if the return value is not <tt>Z_STREAM_END</tt>, the
|
||||
loop continues to read more input.
|
||||
<pre><b>
|
||||
/* done when inflate() says it's done */
|
||||
} while (ret != Z_STREAM_END);
|
||||
</b></pre><!-- -->
|
||||
At this point, decompression successfully completed, or we broke out of the loop due to no
|
||||
more data being available from the input file. If the last <tt>inflate()</tt> return value
|
||||
is not <tt>Z_STREAM_END</tt>, then the <em>zlib</em> stream was incomplete and a data error
|
||||
is returned. Otherwise, we return with a happy return value. Of course, <tt>inflateEnd()</tt>
|
||||
is called first to avoid a memory leak.
|
||||
<pre><b>
|
||||
/* clean up and return */
|
||||
(void)inflateEnd(&strm);
|
||||
return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR;
|
||||
}
|
||||
</b></pre><!-- -->
|
||||
That ends the routines that directly use <em>zlib</em>. The following routines make this
|
||||
a command-line program by running data through the above routines from <tt>stdin</tt> to
|
||||
<tt>stdout</tt>, and handling any errors reported by <tt>def()</tt> or <tt>inf()</tt>.
|
||||
<p>
|
||||
<tt>zerr()</tt> is used to interpret the possible error codes from <tt>def()</tt>
|
||||
and <tt>inf()</tt>, as detailed in their comments above, and print out an error message.
|
||||
Note that these are only a subset of the possible return values from <tt>deflate()</tt>
|
||||
and <tt>inflate()</tt>.
|
||||
<pre><b>
|
||||
/* report a zlib or i/o error */
|
||||
void zerr(int ret)
|
||||
{
|
||||
fputs("zpipe: ", stderr);
|
||||
switch (ret) {
|
||||
case Z_ERRNO:
|
||||
if (ferror(stdin))
|
||||
fputs("error reading stdin\n", stderr);
|
||||
if (ferror(stdout))
|
||||
fputs("error writing stdout\n", stderr);
|
||||
break;
|
||||
case Z_STREAM_ERROR:
|
||||
fputs("invalid compression level\n", stderr);
|
||||
break;
|
||||
case Z_DATA_ERROR:
|
||||
fputs("invalid or incomplete deflate data\n", stderr);
|
||||
break;
|
||||
case Z_MEM_ERROR:
|
||||
fputs("out of memory\n", stderr);
|
||||
break;
|
||||
case Z_VERSION_ERROR:
|
||||
fputs("zlib version mismatch!\n", stderr);
|
||||
}
|
||||
}
|
||||
</b></pre><!-- -->
|
||||
Here is the <tt>main()</tt> routine used to test <tt>def()</tt> and <tt>inf()</tt>. The
|
||||
<tt>zpipe</tt> command is simply a compression pipe from <tt>stdin</tt> to <tt>stdout</tt>, if
|
||||
no arguments are given, or it is a decompression pipe if <tt>zpipe -d</tt> is used. If any other
|
||||
arguments are provided, no compression or decompression is performed. Instead a usage
|
||||
message is displayed. Examples are <tt>zpipe < foo.txt > foo.txt.z</tt> to compress, and
|
||||
<tt>zpipe -d < foo.txt.z > foo.txt</tt> to decompress.
|
||||
<pre><b>
|
||||
/* compress or decompress from stdin to stdout */
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/* avoid end-of-line conversions */
|
||||
SET_BINARY_MODE(stdin);
|
||||
SET_BINARY_MODE(stdout);
|
||||
|
||||
/* do compression if no arguments */
|
||||
if (argc == 1) {
|
||||
ret = def(stdin, stdout, Z_DEFAULT_COMPRESSION);
|
||||
if (ret != Z_OK)
|
||||
zerr(ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* do decompression if -d specified */
|
||||
else if (argc == 2 && strcmp(argv[1], "-d") == 0) {
|
||||
ret = inf(stdin, stdout);
|
||||
if (ret != Z_OK)
|
||||
zerr(ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* otherwise, report usage */
|
||||
else {
|
||||
fputs("zpipe usage: zpipe [-d] < source > dest\n", stderr);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
</b></pre>
|
||||
<hr>
|
||||
<i>Copyright (c) 2004, 2005 by Mark Adler<br>Last modified 11 December 2005</i>
|
||||
</body>
|
||||
</html>
|
|
@ -0,0 +1,205 @@
|
|||
/* zpipe.c: example of proper use of zlib's inflate() and deflate()
|
||||
Not copyrighted -- provided to the public domain
|
||||
Version 1.4 11 December 2005 Mark Adler */
|
||||
|
||||
/* Version history:
|
||||
1.0 30 Oct 2004 First version
|
||||
1.1 8 Nov 2004 Add void casting for unused return values
|
||||
Use switch statement for inflate() return values
|
||||
1.2 9 Nov 2004 Add assertions to document zlib guarantees
|
||||
1.3 6 Apr 2005 Remove incorrect assertion in inf()
|
||||
1.4 11 Dec 2005 Add hack to avoid MSDOS end-of-line conversions
|
||||
Avoid some compiler warnings for input and output buffers
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include "zlib.h"
|
||||
|
||||
#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__)
|
||||
# include <fcntl.h>
|
||||
# include <io.h>
|
||||
# define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY)
|
||||
#else
|
||||
# define SET_BINARY_MODE(file)
|
||||
#endif
|
||||
|
||||
#define CHUNK 16384
|
||||
|
||||
/* Compress from file source to file dest until EOF on source.
|
||||
def() returns Z_OK on success, Z_MEM_ERROR if memory could not be
|
||||
allocated for processing, Z_STREAM_ERROR if an invalid compression
|
||||
level is supplied, Z_VERSION_ERROR if the version of zlib.h and the
|
||||
version of the library linked do not match, or Z_ERRNO if there is
|
||||
an error reading or writing the files. */
|
||||
int def(FILE *source, FILE *dest, int level)
|
||||
{
|
||||
int ret, flush;
|
||||
unsigned have;
|
||||
z_stream strm;
|
||||
unsigned char in[CHUNK];
|
||||
unsigned char out[CHUNK];
|
||||
|
||||
/* allocate deflate state */
|
||||
strm.zalloc = Z_NULL;
|
||||
strm.zfree = Z_NULL;
|
||||
strm.opaque = Z_NULL;
|
||||
ret = deflateInit(&strm, level);
|
||||
if (ret != Z_OK)
|
||||
return ret;
|
||||
|
||||
/* compress until end of file */
|
||||
do {
|
||||
strm.avail_in = fread(in, 1, CHUNK, source);
|
||||
if (ferror(source)) {
|
||||
(void)deflateEnd(&strm);
|
||||
return Z_ERRNO;
|
||||
}
|
||||
flush = feof(source) ? Z_FINISH : Z_NO_FLUSH;
|
||||
strm.next_in = in;
|
||||
|
||||
/* run deflate() on input until output buffer not full, finish
|
||||
compression if all of source has been read in */
|
||||
do {
|
||||
strm.avail_out = CHUNK;
|
||||
strm.next_out = out;
|
||||
ret = deflate(&strm, flush); /* no bad return value */
|
||||
assert(ret != Z_STREAM_ERROR); /* state not clobbered */
|
||||
have = CHUNK - strm.avail_out;
|
||||
if (fwrite(out, 1, have, dest) != have || ferror(dest)) {
|
||||
(void)deflateEnd(&strm);
|
||||
return Z_ERRNO;
|
||||
}
|
||||
} while (strm.avail_out == 0);
|
||||
assert(strm.avail_in == 0); /* all input will be used */
|
||||
|
||||
/* done when last data in file processed */
|
||||
} while (flush != Z_FINISH);
|
||||
assert(ret == Z_STREAM_END); /* stream will be complete */
|
||||
|
||||
/* clean up and return */
|
||||
(void)deflateEnd(&strm);
|
||||
return Z_OK;
|
||||
}
|
||||
|
||||
/* Decompress from file source to file dest until stream ends or EOF.
|
||||
inf() returns Z_OK on success, Z_MEM_ERROR if memory could not be
|
||||
allocated for processing, Z_DATA_ERROR if the deflate data is
|
||||
invalid or incomplete, Z_VERSION_ERROR if the version of zlib.h and
|
||||
the version of the library linked do not match, or Z_ERRNO if there
|
||||
is an error reading or writing the files. */
|
||||
int inf(FILE *source, FILE *dest)
|
||||
{
|
||||
int ret;
|
||||
unsigned have;
|
||||
z_stream strm;
|
||||
unsigned char in[CHUNK];
|
||||
unsigned char out[CHUNK];
|
||||
|
||||
/* allocate inflate state */
|
||||
strm.zalloc = Z_NULL;
|
||||
strm.zfree = Z_NULL;
|
||||
strm.opaque = Z_NULL;
|
||||
strm.avail_in = 0;
|
||||
strm.next_in = Z_NULL;
|
||||
ret = inflateInit(&strm);
|
||||
if (ret != Z_OK)
|
||||
return ret;
|
||||
|
||||
/* decompress until deflate stream ends or end of file */
|
||||
do {
|
||||
strm.avail_in = fread(in, 1, CHUNK, source);
|
||||
if (ferror(source)) {
|
||||
(void)inflateEnd(&strm);
|
||||
return Z_ERRNO;
|
||||
}
|
||||
if (strm.avail_in == 0)
|
||||
break;
|
||||
strm.next_in = in;
|
||||
|
||||
/* run inflate() on input until output buffer not full */
|
||||
do {
|
||||
strm.avail_out = CHUNK;
|
||||
strm.next_out = out;
|
||||
ret = inflate(&strm, Z_NO_FLUSH);
|
||||
assert(ret != Z_STREAM_ERROR); /* state not clobbered */
|
||||
switch (ret) {
|
||||
case Z_NEED_DICT:
|
||||
ret = Z_DATA_ERROR; /* and fall through */
|
||||
case Z_DATA_ERROR:
|
||||
case Z_MEM_ERROR:
|
||||
(void)inflateEnd(&strm);
|
||||
return ret;
|
||||
}
|
||||
have = CHUNK - strm.avail_out;
|
||||
if (fwrite(out, 1, have, dest) != have || ferror(dest)) {
|
||||
(void)inflateEnd(&strm);
|
||||
return Z_ERRNO;
|
||||
}
|
||||
} while (strm.avail_out == 0);
|
||||
|
||||
/* done when inflate() says it's done */
|
||||
} while (ret != Z_STREAM_END);
|
||||
|
||||
/* clean up and return */
|
||||
(void)inflateEnd(&strm);
|
||||
return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR;
|
||||
}
|
||||
|
||||
/* report a zlib or i/o error */
|
||||
void zerr(int ret)
|
||||
{
|
||||
fputs("zpipe: ", stderr);
|
||||
switch (ret) {
|
||||
case Z_ERRNO:
|
||||
if (ferror(stdin))
|
||||
fputs("error reading stdin\n", stderr);
|
||||
if (ferror(stdout))
|
||||
fputs("error writing stdout\n", stderr);
|
||||
break;
|
||||
case Z_STREAM_ERROR:
|
||||
fputs("invalid compression level\n", stderr);
|
||||
break;
|
||||
case Z_DATA_ERROR:
|
||||
fputs("invalid or incomplete deflate data\n", stderr);
|
||||
break;
|
||||
case Z_MEM_ERROR:
|
||||
fputs("out of memory\n", stderr);
|
||||
break;
|
||||
case Z_VERSION_ERROR:
|
||||
fputs("zlib version mismatch!\n", stderr);
|
||||
}
|
||||
}
|
||||
|
||||
/* compress or decompress from stdin to stdout */
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int ret;
|
||||
|
||||
/* avoid end-of-line conversions */
|
||||
SET_BINARY_MODE(stdin);
|
||||
SET_BINARY_MODE(stdout);
|
||||
|
||||
/* do compression if no arguments */
|
||||
if (argc == 1) {
|
||||
ret = def(stdin, stdout, Z_DEFAULT_COMPRESSION);
|
||||
if (ret != Z_OK)
|
||||
zerr(ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* do decompression if -d specified */
|
||||
else if (argc == 2 && strcmp(argv[1], "-d") == 0) {
|
||||
ret = inf(stdin, stdout);
|
||||
if (ret != Z_OK)
|
||||
zerr(ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* otherwise, report usage */
|
||||
else {
|
||||
fputs("zpipe usage: zpipe [-d] < source > dest\n", stderr);
|
||||
return 1;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,409 @@
|
|||
/* zran.c -- example of zlib/gzip stream indexing and random access
|
||||
* Copyright (C) 2005, 2012 Mark Adler
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
Version 1.1 29 Sep 2012 Mark Adler */
|
||||
|
||||
/* Version History:
|
||||
1.0 29 May 2005 First version
|
||||
1.1 29 Sep 2012 Fix memory reallocation error
|
||||
*/
|
||||
|
||||
/* Illustrate the use of Z_BLOCK, inflatePrime(), and inflateSetDictionary()
|
||||
for random access of a compressed file. A file containing a zlib or gzip
|
||||
stream is provided on the command line. The compressed stream is decoded in
|
||||
its entirety, and an index built with access points about every SPAN bytes
|
||||
in the uncompressed output. The compressed file is left open, and can then
|
||||
be read randomly, having to decompress on the average SPAN/2 uncompressed
|
||||
bytes before getting to the desired block of data.
|
||||
|
||||
An access point can be created at the start of any deflate block, by saving
|
||||
the starting file offset and bit of that block, and the 32K bytes of
|
||||
uncompressed data that precede that block. Also the uncompressed offset of
|
||||
that block is saved to provide a referece for locating a desired starting
|
||||
point in the uncompressed stream. build_index() works by decompressing the
|
||||
input zlib or gzip stream a block at a time, and at the end of each block
|
||||
deciding if enough uncompressed data has gone by to justify the creation of
|
||||
a new access point. If so, that point is saved in a data structure that
|
||||
grows as needed to accommodate the points.
|
||||
|
||||
To use the index, an offset in the uncompressed data is provided, for which
|
||||
the latest accees point at or preceding that offset is located in the index.
|
||||
The input file is positioned to the specified location in the index, and if
|
||||
necessary the first few bits of the compressed data is read from the file.
|
||||
inflate is initialized with those bits and the 32K of uncompressed data, and
|
||||
the decompression then proceeds until the desired offset in the file is
|
||||
reached. Then the decompression continues to read the desired uncompressed
|
||||
data from the file.
|
||||
|
||||
Another approach would be to generate the index on demand. In that case,
|
||||
requests for random access reads from the compressed data would try to use
|
||||
the index, but if a read far enough past the end of the index is required,
|
||||
then further index entries would be generated and added.
|
||||
|
||||
There is some fair bit of overhead to starting inflation for the random
|
||||
access, mainly copying the 32K byte dictionary. So if small pieces of the
|
||||
file are being accessed, it would make sense to implement a cache to hold
|
||||
some lookahead and avoid many calls to extract() for small lengths.
|
||||
|
||||
Another way to build an index would be to use inflateCopy(). That would
|
||||
not be constrained to have access points at block boundaries, but requires
|
||||
more memory per access point, and also cannot be saved to file due to the
|
||||
use of pointers in the state. The approach here allows for storage of the
|
||||
index in a file.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "zlib.h"
|
||||
|
||||
#define local static
|
||||
|
||||
#define SPAN 1048576L /* desired distance between access points */
|
||||
#define WINSIZE 32768U /* sliding window size */
|
||||
#define CHUNK 16384 /* file input buffer size */
|
||||
|
||||
/* access point entry */
|
||||
struct point {
|
||||
off_t out; /* corresponding offset in uncompressed data */
|
||||
off_t in; /* offset in input file of first full byte */
|
||||
int bits; /* number of bits (1-7) from byte at in - 1, or 0 */
|
||||
unsigned char window[WINSIZE]; /* preceding 32K of uncompressed data */
|
||||
};
|
||||
|
||||
/* access point list */
|
||||
struct access {
|
||||
int have; /* number of list entries filled in */
|
||||
int size; /* number of list entries allocated */
|
||||
struct point *list; /* allocated list */
|
||||
};
|
||||
|
||||
/* Deallocate an index built by build_index() */
|
||||
local void free_index(struct access *index)
|
||||
{
|
||||
if (index != NULL) {
|
||||
free(index->list);
|
||||
free(index);
|
||||
}
|
||||
}
|
||||
|
||||
/* Add an entry to the access point list. If out of memory, deallocate the
|
||||
existing list and return NULL. */
|
||||
local struct access *addpoint(struct access *index, int bits,
|
||||
off_t in, off_t out, unsigned left, unsigned char *window)
|
||||
{
|
||||
struct point *next;
|
||||
|
||||
/* if list is empty, create it (start with eight points) */
|
||||
if (index == NULL) {
|
||||
index = malloc(sizeof(struct access));
|
||||
if (index == NULL) return NULL;
|
||||
index->list = malloc(sizeof(struct point) << 3);
|
||||
if (index->list == NULL) {
|
||||
free(index);
|
||||
return NULL;
|
||||
}
|
||||
index->size = 8;
|
||||
index->have = 0;
|
||||
}
|
||||
|
||||
/* if list is full, make it bigger */
|
||||
else if (index->have == index->size) {
|
||||
index->size <<= 1;
|
||||
next = realloc(index->list, sizeof(struct point) * index->size);
|
||||
if (next == NULL) {
|
||||
free_index(index);
|
||||
return NULL;
|
||||
}
|
||||
index->list = next;
|
||||
}
|
||||
|
||||
/* fill in entry and increment how many we have */
|
||||
next = index->list + index->have;
|
||||
next->bits = bits;
|
||||
next->in = in;
|
||||
next->out = out;
|
||||
if (left)
|
||||
memcpy(next->window, window + WINSIZE - left, left);
|
||||
if (left < WINSIZE)
|
||||
memcpy(next->window + left, window, WINSIZE - left);
|
||||
index->have++;
|
||||
|
||||
/* return list, possibly reallocated */
|
||||
return index;
|
||||
}
|
||||
|
||||
/* Make one entire pass through the compressed stream and build an index, with
|
||||
access points about every span bytes of uncompressed output -- span is
|
||||
chosen to balance the speed of random access against the memory requirements
|
||||
of the list, about 32K bytes per access point. Note that data after the end
|
||||
of the first zlib or gzip stream in the file is ignored. build_index()
|
||||
returns the number of access points on success (>= 1), Z_MEM_ERROR for out
|
||||
of memory, Z_DATA_ERROR for an error in the input file, or Z_ERRNO for a
|
||||
file read error. On success, *built points to the resulting index. */
|
||||
local int build_index(FILE *in, off_t span, struct access **built)
|
||||
{
|
||||
int ret;
|
||||
off_t totin, totout; /* our own total counters to avoid 4GB limit */
|
||||
off_t last; /* totout value of last access point */
|
||||
struct access *index; /* access points being generated */
|
||||
z_stream strm;
|
||||
unsigned char input[CHUNK];
|
||||
unsigned char window[WINSIZE];
|
||||
|
||||
/* initialize inflate */
|
||||
strm.zalloc = Z_NULL;
|
||||
strm.zfree = Z_NULL;
|
||||
strm.opaque = Z_NULL;
|
||||
strm.avail_in = 0;
|
||||
strm.next_in = Z_NULL;
|
||||
ret = inflateInit2(&strm, 47); /* automatic zlib or gzip decoding */
|
||||
if (ret != Z_OK)
|
||||
return ret;
|
||||
|
||||
/* inflate the input, maintain a sliding window, and build an index -- this
|
||||
also validates the integrity of the compressed data using the check
|
||||
information at the end of the gzip or zlib stream */
|
||||
totin = totout = last = 0;
|
||||
index = NULL; /* will be allocated by first addpoint() */
|
||||
strm.avail_out = 0;
|
||||
do {
|
||||
/* get some compressed data from input file */
|
||||
strm.avail_in = fread(input, 1, CHUNK, in);
|
||||
if (ferror(in)) {
|
||||
ret = Z_ERRNO;
|
||||
goto build_index_error;
|
||||
}
|
||||
if (strm.avail_in == 0) {
|
||||
ret = Z_DATA_ERROR;
|
||||
goto build_index_error;
|
||||
}
|
||||
strm.next_in = input;
|
||||
|
||||
/* process all of that, or until end of stream */
|
||||
do {
|
||||
/* reset sliding window if necessary */
|
||||
if (strm.avail_out == 0) {
|
||||
strm.avail_out = WINSIZE;
|
||||
strm.next_out = window;
|
||||
}
|
||||
|
||||
/* inflate until out of input, output, or at end of block --
|
||||
update the total input and output counters */
|
||||
totin += strm.avail_in;
|
||||
totout += strm.avail_out;
|
||||
ret = inflate(&strm, Z_BLOCK); /* return at end of block */
|
||||
totin -= strm.avail_in;
|
||||
totout -= strm.avail_out;
|
||||
if (ret == Z_NEED_DICT)
|
||||
ret = Z_DATA_ERROR;
|
||||
if (ret == Z_MEM_ERROR || ret == Z_DATA_ERROR)
|
||||
goto build_index_error;
|
||||
if (ret == Z_STREAM_END)
|
||||
break;
|
||||
|
||||
/* if at end of block, consider adding an index entry (note that if
|
||||
data_type indicates an end-of-block, then all of the
|
||||
uncompressed data from that block has been delivered, and none
|
||||
of the compressed data after that block has been consumed,
|
||||
except for up to seven bits) -- the totout == 0 provides an
|
||||
entry point after the zlib or gzip header, and assures that the
|
||||
index always has at least one access point; we avoid creating an
|
||||
access point after the last block by checking bit 6 of data_type
|
||||
*/
|
||||
if ((strm.data_type & 128) && !(strm.data_type & 64) &&
|
||||
(totout == 0 || totout - last > span)) {
|
||||
index = addpoint(index, strm.data_type & 7, totin,
|
||||
totout, strm.avail_out, window);
|
||||
if (index == NULL) {
|
||||
ret = Z_MEM_ERROR;
|
||||
goto build_index_error;
|
||||
}
|
||||
last = totout;
|
||||
}
|
||||
} while (strm.avail_in != 0);
|
||||
} while (ret != Z_STREAM_END);
|
||||
|
||||
/* clean up and return index (release unused entries in list) */
|
||||
(void)inflateEnd(&strm);
|
||||
index->list = realloc(index->list, sizeof(struct point) * index->have);
|
||||
index->size = index->have;
|
||||
*built = index;
|
||||
return index->size;
|
||||
|
||||
/* return error */
|
||||
build_index_error:
|
||||
(void)inflateEnd(&strm);
|
||||
if (index != NULL)
|
||||
free_index(index);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Use the index to read len bytes from offset into buf, return bytes read or
|
||||
negative for error (Z_DATA_ERROR or Z_MEM_ERROR). If data is requested past
|
||||
the end of the uncompressed data, then extract() will return a value less
|
||||
than len, indicating how much as actually read into buf. This function
|
||||
should not return a data error unless the file was modified since the index
|
||||
was generated. extract() may also return Z_ERRNO if there is an error on
|
||||
reading or seeking the input file. */
|
||||
local int extract(FILE *in, struct access *index, off_t offset,
|
||||
unsigned char *buf, int len)
|
||||
{
|
||||
int ret, skip;
|
||||
z_stream strm;
|
||||
struct point *here;
|
||||
unsigned char input[CHUNK];
|
||||
unsigned char discard[WINSIZE];
|
||||
|
||||
/* proceed only if something reasonable to do */
|
||||
if (len < 0)
|
||||
return 0;
|
||||
|
||||
/* find where in stream to start */
|
||||
here = index->list;
|
||||
ret = index->have;
|
||||
while (--ret && here[1].out <= offset)
|
||||
here++;
|
||||
|
||||
/* initialize file and inflate state to start there */
|
||||
strm.zalloc = Z_NULL;
|
||||
strm.zfree = Z_NULL;
|
||||
strm.opaque = Z_NULL;
|
||||
strm.avail_in = 0;
|
||||
strm.next_in = Z_NULL;
|
||||
ret = inflateInit2(&strm, -15); /* raw inflate */
|
||||
if (ret != Z_OK)
|
||||
return ret;
|
||||
ret = fseeko(in, here->in - (here->bits ? 1 : 0), SEEK_SET);
|
||||
if (ret == -1)
|
||||
goto extract_ret;
|
||||
if (here->bits) {
|
||||
ret = getc(in);
|
||||
if (ret == -1) {
|
||||
ret = ferror(in) ? Z_ERRNO : Z_DATA_ERROR;
|
||||
goto extract_ret;
|
||||
}
|
||||
(void)inflatePrime(&strm, here->bits, ret >> (8 - here->bits));
|
||||
}
|
||||
(void)inflateSetDictionary(&strm, here->window, WINSIZE);
|
||||
|
||||
/* skip uncompressed bytes until offset reached, then satisfy request */
|
||||
offset -= here->out;
|
||||
strm.avail_in = 0;
|
||||
skip = 1; /* while skipping to offset */
|
||||
do {
|
||||
/* define where to put uncompressed data, and how much */
|
||||
if (offset == 0 && skip) { /* at offset now */
|
||||
strm.avail_out = len;
|
||||
strm.next_out = buf;
|
||||
skip = 0; /* only do this once */
|
||||
}
|
||||
if (offset > WINSIZE) { /* skip WINSIZE bytes */
|
||||
strm.avail_out = WINSIZE;
|
||||
strm.next_out = discard;
|
||||
offset -= WINSIZE;
|
||||
}
|
||||
else if (offset != 0) { /* last skip */
|
||||
strm.avail_out = (unsigned)offset;
|
||||
strm.next_out = discard;
|
||||
offset = 0;
|
||||
}
|
||||
|
||||
/* uncompress until avail_out filled, or end of stream */
|
||||
do {
|
||||
if (strm.avail_in == 0) {
|
||||
strm.avail_in = fread(input, 1, CHUNK, in);
|
||||
if (ferror(in)) {
|
||||
ret = Z_ERRNO;
|
||||
goto extract_ret;
|
||||
}
|
||||
if (strm.avail_in == 0) {
|
||||
ret = Z_DATA_ERROR;
|
||||
goto extract_ret;
|
||||
}
|
||||
strm.next_in = input;
|
||||
}
|
||||
ret = inflate(&strm, Z_NO_FLUSH); /* normal inflate */
|
||||
if (ret == Z_NEED_DICT)
|
||||
ret = Z_DATA_ERROR;
|
||||
if (ret == Z_MEM_ERROR || ret == Z_DATA_ERROR)
|
||||
goto extract_ret;
|
||||
if (ret == Z_STREAM_END)
|
||||
break;
|
||||
} while (strm.avail_out != 0);
|
||||
|
||||
/* if reach end of stream, then don't keep trying to get more */
|
||||
if (ret == Z_STREAM_END)
|
||||
break;
|
||||
|
||||
/* do until offset reached and requested data read, or stream ends */
|
||||
} while (skip);
|
||||
|
||||
/* compute number of uncompressed bytes read after offset */
|
||||
ret = skip ? 0 : len - strm.avail_out;
|
||||
|
||||
/* clean up and return bytes read or error */
|
||||
extract_ret:
|
||||
(void)inflateEnd(&strm);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Demonstrate the use of build_index() and extract() by processing the file
|
||||
provided on the command line, and the extracting 16K from about 2/3rds of
|
||||
the way through the uncompressed output, and writing that to stdout. */
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int len;
|
||||
off_t offset;
|
||||
FILE *in;
|
||||
struct access *index = NULL;
|
||||
unsigned char buf[CHUNK];
|
||||
|
||||
/* open input file */
|
||||
if (argc != 2) {
|
||||
fprintf(stderr, "usage: zran file.gz\n");
|
||||
return 1;
|
||||
}
|
||||
in = fopen(argv[1], "rb");
|
||||
if (in == NULL) {
|
||||
fprintf(stderr, "zran: could not open %s for reading\n", argv[1]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* build index */
|
||||
len = build_index(in, SPAN, &index);
|
||||
if (len < 0) {
|
||||
fclose(in);
|
||||
switch (len) {
|
||||
case Z_MEM_ERROR:
|
||||
fprintf(stderr, "zran: out of memory\n");
|
||||
break;
|
||||
case Z_DATA_ERROR:
|
||||
fprintf(stderr, "zran: compressed data error in %s\n", argv[1]);
|
||||
break;
|
||||
case Z_ERRNO:
|
||||
fprintf(stderr, "zran: read error on %s\n", argv[1]);
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "zran: error %d while building index\n", len);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
fprintf(stderr, "zran: built index with %d access points\n", len);
|
||||
|
||||
/* use index by reading some bytes from an arbitrary offset */
|
||||
offset = (index->list[index->have - 1].out << 1) / 3;
|
||||
len = extract(in, index, offset, buf, CHUNK);
|
||||
if (len < 0)
|
||||
fprintf(stderr, "zran: extraction failed: %s error\n",
|
||||
len == Z_MEM_ERROR ? "out of memory" : "input corrupted");
|
||||
else {
|
||||
fwrite(buf, 1, len, stdout);
|
||||
fprintf(stderr, "zran: extracted %d bytes at %llu\n", len, offset);
|
||||
}
|
||||
|
||||
/* clean up and exit */
|
||||
free_index(index);
|
||||
fclose(in);
|
||||
return 0;
|
||||
}
|
|
@ -0,0 +1,409 @@
|
|||
/* zran.c -- example of zlib/gzip stream indexing and random access
|
||||
* Copyright (C) 2005, 2012 Mark Adler
|
||||
* For conditions of distribution and use, see copyright notice in zlib.h
|
||||
Version 1.1 29 Sep 2012 Mark Adler */
|
||||
|
||||
/* Version History:
|
||||
1.0 29 May 2005 First version
|
||||
1.1 29 Sep 2012 Fix memory reallocation error
|
||||
*/
|
||||
|
||||
/* Illustrate the use of Z_BLOCK, inflatePrime(), and inflateSetDictionary()
|
||||
for random access of a compressed file. A file containing a zlib or gzip
|
||||
stream is provided on the command line. The compressed stream is decoded in
|
||||
its entirety, and an index built with access points about every SPAN bytes
|
||||
in the uncompressed output. The compressed file is left open, and can then
|
||||
be read randomly, having to decompress on the average SPAN/2 uncompressed
|
||||
bytes before getting to the desired block of data.
|
||||
|
||||
An access point can be created at the start of any deflate block, by saving
|
||||
the starting file offset and bit of that block, and the 32K bytes of
|
||||
uncompressed data that precede that block. Also the uncompressed offset of
|
||||
that block is saved to provide a referece for locating a desired starting
|
||||
point in the uncompressed stream. build_index() works by decompressing the
|
||||
input zlib or gzip stream a block at a time, and at the end of each block
|
||||
deciding if enough uncompressed data has gone by to justify the creation of
|
||||
a new access point. If so, that point is saved in a data structure that
|
||||
grows as needed to accommodate the points.
|
||||
|
||||
To use the index, an offset in the uncompressed data is provided, for which
|
||||
the latest accees point at or preceding that offset is located in the index.
|
||||
The input file is positioned to the specified location in the index, and if
|
||||
necessary the first few bits of the compressed data is read from the file.
|
||||
inflate is initialized with those bits and the 32K of uncompressed data, and
|
||||
the decompression then proceeds until the desired offset in the file is
|
||||
reached. Then the decompression continues to read the desired uncompressed
|
||||
data from the file.
|
||||
|
||||
Another approach would be to generate the index on demand. In that case,
|
||||
requests for random access reads from the compressed data would try to use
|
||||
the index, but if a read far enough past the end of the index is required,
|
||||
then further index entries would be generated and added.
|
||||
|
||||
There is some fair bit of overhead to starting inflation for the random
|
||||
access, mainly copying the 32K byte dictionary. So if small pieces of the
|
||||
file are being accessed, it would make sense to implement a cache to hold
|
||||
some lookahead and avoid many calls to extract() for small lengths.
|
||||
|
||||
Another way to build an index would be to use inflateCopy(). That would
|
||||
not be constrained to have access points at block boundaries, but requires
|
||||
more memory per access point, and also cannot be saved to file due to the
|
||||
use of pointers in the state. The approach here allows for storage of the
|
||||
index in a file.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "zlib.h"
|
||||
|
||||
#define local static
|
||||
|
||||
#define SPAN 1048576L /* desired distance between access points */
|
||||
#define WINSIZE 32768U /* sliding window size */
|
||||
#define CHUNK 16384 /* file input buffer size */
|
||||
|
||||
/* access point entry */
|
||||
struct point {
|
||||
off_t out; /* corresponding offset in uncompressed data */
|
||||
off_t in; /* offset in input file of first full byte */
|
||||
int bits; /* number of bits (1-7) from byte at in - 1, or 0 */
|
||||
unsigned char window[WINSIZE]; /* preceding 32K of uncompressed data */
|
||||
};
|
||||
|
||||
/* access point list */
|
||||
struct access {
|
||||
int have; /* number of list entries filled in */
|
||||
int size; /* number of list entries allocated */
|
||||
struct point *list; /* allocated list */
|
||||
};
|
||||
|
||||
/* Deallocate an index built by build_index() */
|
||||
local void free_index(struct access *index)
|
||||
{
|
||||
if (index != NULL) {
|
||||
free(index->list);
|
||||
free(index);
|
||||
}
|
||||
}
|
||||
|
||||
/* Add an entry to the access point list. If out of memory, deallocate the
|
||||
existing list and return NULL. */
|
||||
local struct access *addpoint(struct access *index, int bits,
|
||||
off_t in, off_t out, unsigned left, unsigned char *window)
|
||||
{
|
||||
struct point *next;
|
||||
|
||||
/* if list is empty, create it (start with eight points) */
|
||||
if (index == NULL) {
|
||||
index = malloc(sizeof(struct access));
|
||||
if (index == NULL) return NULL;
|
||||
index->list = malloc(sizeof(struct point) << 3);
|
||||
if (index->list == NULL) {
|
||||
free(index);
|
||||
return NULL;
|
||||
}
|
||||
index->size = 8;
|
||||
index->have = 0;
|
||||
}
|
||||
|
||||
/* if list is full, make it bigger */
|
||||
else if (index->have == index->size) {
|
||||
index->size <<= 1;
|
||||
next = realloc(index->list, sizeof(struct point) * index->size);
|
||||
if (next == NULL) {
|
||||
free_index(index);
|
||||
return NULL;
|
||||
}
|
||||
index->list = next;
|
||||
}
|
||||
|
||||
/* fill in entry and increment how many we have */
|
||||
next = index->list + index->have;
|
||||
next->bits = bits;
|
||||
next->in = in;
|
||||
next->out = out;
|
||||
if (left)
|
||||
memcpy(next->window, window + WINSIZE - left, left);
|
||||
if (left < WINSIZE)
|
||||
memcpy(next->window + left, window, WINSIZE - left);
|
||||
index->have++;
|
||||
|
||||
/* return list, possibly reallocated */
|
||||
return index;
|
||||
}
|
||||
|
||||
/* Make one entire pass through the compressed stream and build an index, with
|
||||
access points about every span bytes of uncompressed output -- span is
|
||||
chosen to balance the speed of random access against the memory requirements
|
||||
of the list, about 32K bytes per access point. Note that data after the end
|
||||
of the first zlib or gzip stream in the file is ignored. build_index()
|
||||
returns the number of access points on success (>= 1), Z_MEM_ERROR for out
|
||||
of memory, Z_DATA_ERROR for an error in the input file, or Z_ERRNO for a
|
||||
file read error. On success, *built points to the resulting index. */
|
||||
local int build_index(FILE *in, off_t span, struct access **built)
|
||||
{
|
||||
int ret;
|
||||
off_t totin, totout; /* our own total counters to avoid 4GB limit */
|
||||
off_t last; /* totout value of last access point */
|
||||
struct access *index; /* access points being generated */
|
||||
z_stream strm;
|
||||
unsigned char input[CHUNK];
|
||||
unsigned char window[WINSIZE];
|
||||
|
||||
/* initialize inflate */
|
||||
strm.zalloc = Z_NULL;
|
||||
strm.zfree = Z_NULL;
|
||||
strm.opaque = Z_NULL;
|
||||
strm.avail_in = 0;
|
||||
strm.next_in = Z_NULL;
|
||||
ret = inflateInit2(&strm, 47); /* automatic zlib or gzip decoding */
|
||||
if (ret != Z_OK)
|
||||
return ret;
|
||||
|
||||
/* inflate the input, maintain a sliding window, and build an index -- this
|
||||
also validates the integrity of the compressed data using the check
|
||||
information at the end of the gzip or zlib stream */
|
||||
totin = totout = last = 0;
|
||||
index = NULL; /* will be allocated by first addpoint() */
|
||||
strm.avail_out = 0;
|
||||
do {
|
||||
/* get some compressed data from input file */
|
||||
strm.avail_in = fread(input, 1, CHUNK, in);
|
||||
if (ferror(in)) {
|
||||
ret = Z_ERRNO;
|
||||
goto build_index_error;
|
||||
}
|
||||
if (strm.avail_in == 0) {
|
||||
ret = Z_DATA_ERROR;
|
||||
goto build_index_error;
|
||||
}
|
||||
strm.next_in = input;
|
||||
|
||||
/* process all of that, or until end of stream */
|
||||
do {
|
||||
/* reset sliding window if necessary */
|
||||
if (strm.avail_out == 0) {
|
||||
strm.avail_out = WINSIZE;
|
||||
strm.next_out = window;
|
||||
}
|
||||
|
||||
/* inflate until out of input, output, or at end of block --
|
||||
update the total input and output counters */
|
||||
totin += strm.avail_in;
|
||||
totout += strm.avail_out;
|
||||
ret = inflate(&strm, Z_BLOCK); /* return at end of block */
|
||||
totin -= strm.avail_in;
|
||||
totout -= strm.avail_out;
|
||||
if (ret == Z_NEED_DICT)
|
||||
ret = Z_DATA_ERROR;
|
||||
if (ret == Z_MEM_ERROR || ret == Z_DATA_ERROR)
|
||||
goto build_index_error;
|
||||
if (ret == Z_STREAM_END)
|
||||
break;
|
||||
|
||||
/* if at end of block, consider adding an index entry (note that if
|
||||
data_type indicates an end-of-block, then all of the
|
||||
uncompressed data from that block has been delivered, and none
|
||||
of the compressed data after that block has been consumed,
|
||||
except for up to seven bits) -- the totout == 0 provides an
|
||||
entry point after the zlib or gzip header, and assures that the
|
||||
index always has at least one access point; we avoid creating an
|
||||
access point after the last block by checking bit 6 of data_type
|
||||
*/
|
||||
if ((strm.data_type & 128) && !(strm.data_type & 64) &&
|
||||
(totout == 0 || totout - last > span)) {
|
||||
index = addpoint(index, strm.data_type & 7, totin,
|
||||
totout, strm.avail_out, window);
|
||||
if (index == NULL) {
|
||||
ret = Z_MEM_ERROR;
|
||||
goto build_index_error;
|
||||
}
|
||||
last = totout;
|
||||
}
|
||||
} while (strm.avail_in != 0);
|
||||
} while (ret != Z_STREAM_END);
|
||||
|
||||
/* clean up and return index (release unused entries in list) */
|
||||
(void)inflateEnd(&strm);
|
||||
index->list = realloc(index->list, sizeof(struct point) * index->have);
|
||||
index->size = index->have;
|
||||
*built = index;
|
||||
return index->size;
|
||||
|
||||
/* return error */
|
||||
build_index_error:
|
||||
(void)inflateEnd(&strm);
|
||||
if (index != NULL)
|
||||
free_index(index);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Use the index to read len bytes from offset into buf, return bytes read or
|
||||
negative for error (Z_DATA_ERROR or Z_MEM_ERROR). If data is requested past
|
||||
the end of the uncompressed data, then extract() will return a value less
|
||||
than len, indicating how much as actually read into buf. This function
|
||||
should not return a data error unless the file was modified since the index
|
||||
was generated. extract() may also return Z_ERRNO if there is an error on
|
||||
reading or seeking the input file. */
|
||||
local int extract(FILE *in, struct access *index, off_t offset,
|
||||
unsigned char *buf, int len)
|
||||
{
|
||||
int ret, skip;
|
||||
z_stream strm;
|
||||
struct point *here;
|
||||
unsigned char input[CHUNK];
|
||||
unsigned char discard[WINSIZE];
|
||||
|
||||
/* proceed only if something reasonable to do */
|
||||
if (len < 0)
|
||||
return 0;
|
||||
|
||||
/* find where in stream to start */
|
||||
here = index->list;
|
||||
ret = index->have;
|
||||
while (--ret && here[1].out <= offset)
|
||||
here++;
|
||||
|
||||
/* initialize file and inflate state to start there */
|
||||
strm.zalloc = Z_NULL;
|
||||
strm.zfree = Z_NULL;
|
||||
strm.opaque = Z_NULL;
|
||||
strm.avail_in = 0;
|
||||
strm.next_in = Z_NULL;
|
||||
ret = inflateInit2(&strm, -15); /* raw inflate */
|
||||
if (ret != Z_OK)
|
||||
return ret;
|
||||
ret = fseeko(in, here->in - (here->bits ? 1 : 0), SEEK_SET);
|
||||
if (ret == -1)
|
||||
goto extract_ret;
|
||||
if (here->bits) {
|
||||
ret = getc(in);
|
||||
if (ret == -1) {
|
||||
ret = ferror(in) ? Z_ERRNO : Z_DATA_ERROR;
|
||||
goto extract_ret;
|
||||
}
|
||||
(void)inflatePrime(&strm, here->bits, ret >> (8 - here->bits));
|
||||
}
|
||||
(void)inflateSetDictionary(&strm, here->window, WINSIZE);
|
||||
|
||||
/* skip uncompressed bytes until offset reached, then satisfy request */
|
||||
offset -= here->out;
|
||||
strm.avail_in = 0;
|
||||
skip = 1; /* while skipping to offset */
|
||||
do {
|
||||
/* define where to put uncompressed data, and how much */
|
||||
if (offset == 0 && skip) { /* at offset now */
|
||||
strm.avail_out = len;
|
||||
strm.next_out = buf;
|
||||
skip = 0; /* only do this once */
|
||||
}
|
||||
if (offset > WINSIZE) { /* skip WINSIZE bytes */
|
||||
strm.avail_out = WINSIZE;
|
||||
strm.next_out = discard;
|
||||
offset -= WINSIZE;
|
||||
}
|
||||
else if (offset != 0) { /* last skip */
|
||||
strm.avail_out = (unsigned)offset;
|
||||
strm.next_out = discard;
|
||||
offset = 0;
|
||||
}
|
||||
|
||||
/* uncompress until avail_out filled, or end of stream */
|
||||
do {
|
||||
if (strm.avail_in == 0) {
|
||||
strm.avail_in = fread(input, 1, CHUNK, in);
|
||||
if (ferror(in)) {
|
||||
ret = Z_ERRNO;
|
||||
goto extract_ret;
|
||||
}
|
||||
if (strm.avail_in == 0) {
|
||||
ret = Z_DATA_ERROR;
|
||||
goto extract_ret;
|
||||
}
|
||||
strm.next_in = input;
|
||||
}
|
||||
ret = inflate(&strm, Z_NO_FLUSH); /* normal inflate */
|
||||
if (ret == Z_NEED_DICT)
|
||||
ret = Z_DATA_ERROR;
|
||||
if (ret == Z_MEM_ERROR || ret == Z_DATA_ERROR)
|
||||
goto extract_ret;
|
||||
if (ret == Z_STREAM_END)
|
||||
break;
|
||||
} while (strm.avail_out != 0);
|
||||
|
||||
/* if reach end of stream, then don't keep trying to get more */
|
||||
if (ret == Z_STREAM_END)
|
||||
break;
|
||||
|
||||
/* do until offset reached and requested data read, or stream ends */
|
||||
} while (skip);
|
||||
|
||||
/* compute number of uncompressed bytes read after offset */
|
||||
ret = skip ? 0 : len - strm.avail_out;
|
||||
|
||||
/* clean up and return bytes read or error */
|
||||
extract_ret:
|
||||
(void)inflateEnd(&strm);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Demonstrate the use of build_index() and extract() by processing the file
|
||||
provided on the command line, and the extracting 16K from about 2/3rds of
|
||||
the way through the uncompressed output, and writing that to stdout. */
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int len;
|
||||
off_t offset;
|
||||
FILE *in;
|
||||
struct access *index = NULL;
|
||||
unsigned char buf[CHUNK];
|
||||
|
||||
/* open input file */
|
||||
if (argc != 2) {
|
||||
fprintf(stderr, "usage: zran file.gz\n");
|
||||
return 1;
|
||||
}
|
||||
in = fopen(argv[1], "rb");
|
||||
if (in == NULL) {
|
||||
fprintf(stderr, "zran: could not open %s for reading\n", argv[1]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* build index */
|
||||
len = build_index(in, SPAN, &index);
|
||||
if (len < 0) {
|
||||
fclose(in);
|
||||
switch (len) {
|
||||
case Z_MEM_ERROR:
|
||||
fprintf(stderr, "zran: out of memory\n");
|
||||
break;
|
||||
case Z_DATA_ERROR:
|
||||
fprintf(stderr, "zran: compressed data error in %s\n", argv[1]);
|
||||
break;
|
||||
case Z_ERRNO:
|
||||
fprintf(stderr, "zran: read error on %s\n", argv[1]);
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "zran: error %d while building index\n", len);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
fprintf(stderr, "zran: built index with %d access points\n", len);
|
||||
|
||||
/* use index by reading some bytes from an arbitrary offset */
|
||||
offset = (index->list[index->have - 1].out << 1) / 3;
|
||||
len = extract(in, index, offset, buf, CHUNK);
|
||||
if (len < 0)
|
||||
fprintf(stderr, "zran: extraction failed: %s error\n",
|
||||
len == Z_MEM_ERROR ? "out of memory" : "input corrupted");
|
||||
else {
|
||||
fwrite(buf, 1, len, stdout);
|
||||
fprintf(stderr, "zran: extracted %d bytes at %llu\n", len, offset);
|
||||
}
|
||||
|
||||
/* clean up and exit */
|
||||
free_index(index);
|
||||
fclose(in);
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue