gzip-iso: optimal small chunks sequential access

This commit is contained in:
Avi Halachmi (:avih) 2014-04-29 11:26:35 +03:00
parent 86a6fcddc0
commit 49505ab93f
2 changed files with 95 additions and 46 deletions

View File

@ -200,6 +200,7 @@ public:
m_pIndex(0),
m_cache(CACHE_SIZE_MB) {
m_blocksize = 2048;
m_zstate.isValid = 0;
};
virtual ~GzippedFileReader(void) { Close(); };
@ -230,6 +231,8 @@ private:
int _ReadSync(void* pBuffer, PX_off_t offset, uint bytesToRead);
int mBytesRead; // Temp sync read result when simulating async read
Access* m_pIndex; // Quick access index
Zstate m_zstate;
ChunksCache m_cache;
};
@ -309,17 +312,18 @@ int GzippedFileReader::ReadSync(void* pBuffer, uint sector, uint count) {
return _ReadSync(pBuffer, offset, bytesToRead);
}
#define SEQUENTIAL_CHUNK (256 * 1024)
void GzippedFileReader::GetOptimalChunkForExtraction(PX_off_t offset, PX_off_t& out_chunkStart, int& out_chunkLen) {
// The optimal extraction size is m_pIndex->span for minimal extraction on continuous access.
// However, if span is big (e.g. 4M) then each extraction could be too slow for the game to like.
// As a tradeoff, we can extract in smaller chunks, but some data will be extracted more than required.
// Empirical examination suggests that span==4M and extraction-unit==1M results in the following:
// - On continuous access: overall extraction time ->200%, average single extract -> 30%, max extract -> 60%
// So on continuous we spends 2x cpu time on extraction, but the games like it better due to shorter pauses.
// On completely random access from all over the disk - it's a clear win (e.g. SoTC).
int size = std::min(m_pIndex->span, 1 * 1024 * 1024);
out_chunkStart = (PX_off_t)size * (offset / size);
out_chunkLen = size;
// However, to keep the index small, span has to be relatively big (e.g. 4M) and extracting it
// in one go is sometimes more than games are willing to accept nicely.
// But since sequential access is practically free (regardless of span) due to storing the state of the
// decompressor (m_zstate), we're always setting the extract chunk to significantly smaller than span
// (e.g. span = 4M and extract = 256k).
// This results is quickest possible sequential extraction and minimal time for random access.
// TODO: cache also the extracted data between span boundary and SEQUENTIAL_CHUNK boundary on random.
out_chunkStart = (PX_off_t)SEQUENTIAL_CHUNK * (offset / SEQUENTIAL_CHUNK);
out_chunkLen = SEQUENTIAL_CHUNK;
}
int GzippedFileReader::_ReadSync(void* pBuffer, PX_off_t offset, uint bytesToRead) {
@ -354,9 +358,11 @@ int GzippedFileReader::_ReadSync(void* pBuffer, PX_off_t offset, uint bytesToRea
PTT s = NOW();
FILE* in = fopen(m_filename.ToUTF8(), "rb");
res = extract(in, m_pIndex, chunkStart, (unsigned char*)chunk, chunkLen);
res = extract(in, m_pIndex, chunkStart, (unsigned char*)chunk, chunkLen, &m_zstate);
fclose(in);
Console.WriteLn(Color_Gray, "gunzip: %1.1f MB - %d ms", (float)(chunkLen) / 1024 / 1024, (int)(NOW() - s));
int duration = NOW() - s;
if (duration > 10)
Console.WriteLn(Color_Gray, "gunzip: %1.2f MB - %d ms", (float)(chunkLen) / 1024 / 1024, duration);
if (res < 0)
return res;
@ -373,6 +379,11 @@ void GzippedFileReader::Close() {
free_index((Access*)m_pIndex);
m_pIndex = 0;
}
if (m_zstate.isValid) {
(void)inflateEnd(&m_zstate.strm);
m_zstate.isValid = false;
}
}

View File

@ -45,6 +45,8 @@ Comments) 1950 to 1952 in the files http://tools.ietf.org/html/rfc1950
- access: added members span and uncompressed_size which are filled by build_index.
- point and access packed for safety since they go to disk as is (but no endian-ness handling).
But they're still aligned since each member size is multiple of 4, so no perf issues.
- extract: added state import/export for instant sequential access regardless of index
(Thanks to Mark Adler for suggesting the approach)
- build_index(...) - added progress prints
- CHUNK changed from 16k to 512k
*/
@ -108,17 +110,19 @@ Comments) 1950 to 1952 in the files http://tools.ietf.org/html/rfc1950
#include <Pcsx2Types.h>
#ifdef WIN32
# define PX_fseeko _fseeki64
# define PX_ftello _ftelli64
# define PX_off_t s64 /* __int64 */
#else
# define PX_fseeko fseeko
# define PX_ftello ftello
# define PX_off_t off_t
#endif
#define local static
//#define SPAN (1048576L*2) /* desired distance between access points */
#define WINSIZE 32768U /* sliding window size */
#define CHUNK (512 * 1024) /* file input buffer size */
//#define SPAN (1048576L) /* desired distance between access points */
#define WINSIZE 32768U /* sliding window size */
#define CHUNK (64 * 1024) /* file input buffer size */
#ifdef WIN32
# pragma pack(push, indexData, 1)
@ -325,6 +329,14 @@ local int build_index(FILE *in, PX_off_t span, struct access **built)
return ret;
}
typedef struct zstate {
PX_off_t lastChunkFilepos;
PX_off_t nextOffset;
uint next_in_offset;
z_stream strm;
int isValid;
} Zstate;
/* Use the index to read len bytes from offset into buf, return bytes read or
negative for error (Z_DATA_ERROR or Z_MEM_ERROR). If data is requested past
the end of the uncompressed data, then extract() will return a value less
@ -333,50 +345,67 @@ local int build_index(FILE *in, PX_off_t span, struct access **built)
was generated. extract() may also return Z_ERRNO if there is an error on
reading or seeking the input file. */
local int extract(FILE *in, struct access *index, PX_off_t offset,
unsigned char *buf, int len)
unsigned char *buf, int len, zstate *state = 0)
{
int ret, skip;
z_stream strm;
struct point *here;
unsigned char input[CHUNK];
unsigned char discard[WINSIZE];
PX_off_t orig_offset = offset;
/* proceed only if something reasonable to do */
if (len < 0)
return 0;
/* find where in stream to start */
here = index->list;
ret = index->have;
while (--ret && here[1].out <= offset)
here++;
/* initialize file and inflate state to start there */
strm.zalloc = Z_NULL;
strm.zfree = Z_NULL;
strm.opaque = Z_NULL;
strm.avail_in = 0;
strm.next_in = Z_NULL;
ret = inflateInit2(&strm, -15); /* raw inflate */
if (ret != Z_OK)
return ret;
ret = PX_fseeko(in, here->in - (here->bits ? 1 : 0), SEEK_SET);
if (ret == -1)
goto extract_ret;
if (here->bits) {
ret = getc(in);
if (ret == -1) {
ret = ferror(in) ? Z_ERRNO : Z_DATA_ERROR;
goto extract_ret;
}
(void)inflatePrime(&strm, here->bits, ret >> (8 - here->bits));
if (state && state->isValid && offset != state->nextOffset) {
// state doesn't match offset, free allocations before strm is overwritten
(void)inflateEnd(&state->strm);
state->isValid = 0;
}
if (state && state->isValid && offset == state->nextOffset) {
strm = state->strm;
state->isValid = 0; // we took control over strm. revalidate when/if we give it back
PX_fseeko(in, state->lastChunkFilepos + state->next_in_offset, SEEK_SET);
strm.avail_in = 0;
offset = 0;
skip = 1;
} else {
/* find where in stream to start */
here = index->list;
ret = index->have;
while (--ret && here[1].out <= offset)
here++;
/* initialize file and inflate state to start there */
strm.zalloc = Z_NULL;
strm.zfree = Z_NULL;
strm.opaque = Z_NULL;
strm.avail_in = 0;
strm.next_in = Z_NULL;
ret = inflateInit2(&strm, -15); /* raw inflate */
if (ret != Z_OK)
return ret;
ret = PX_fseeko(in, here->in - (here->bits ? 1 : 0), SEEK_SET);
if (ret == -1)
goto extract_ret;
if (here->bits) {
ret = getc(in);
if (ret == -1) {
ret = ferror(in) ? Z_ERRNO : Z_DATA_ERROR;
goto extract_ret;
}
(void)inflatePrime(&strm, here->bits, ret >> (8 - here->bits));
}
(void)inflateSetDictionary(&strm, here->window, WINSIZE);
/* skip uncompressed bytes until offset reached, then satisfy request */
offset -= here->out;
strm.avail_in = 0;
skip = 1; /* while skipping to offset */
}
(void)inflateSetDictionary(&strm, here->window, WINSIZE);
/* skip uncompressed bytes until offset reached, then satisfy request */
offset -= here->out;
strm.avail_in = 0;
skip = 1; /* while skipping to offset */
do {
/* define where to put uncompressed data, and how much */
if (offset == 0 && skip) { /* at offset now */
@ -398,6 +427,7 @@ local int extract(FILE *in, struct access *index, PX_off_t offset,
/* uncompress until avail_out filled, or end of stream */
do {
if (strm.avail_in == 0) {
state && (state->lastChunkFilepos = PX_ftello(in));
strm.avail_in = fread(input, 1, CHUNK, in);
if (ferror(in)) {
ret = Z_ERRNO;
@ -410,6 +440,7 @@ local int extract(FILE *in, struct access *index, PX_off_t offset,
strm.next_in = input;
}
ret = inflate(&strm, Z_NO_FLUSH); /* normal inflate */
state && (state->next_in_offset = strm.next_in - input);
if (ret == Z_NEED_DICT)
ret = Z_DATA_ERROR;
if (ret == Z_MEM_ERROR || ret == Z_DATA_ERROR)
@ -425,12 +456,19 @@ local int extract(FILE *in, struct access *index, PX_off_t offset,
/* do until offset reached and requested data read, or stream ends */
} while (skip);
int isEnd = ret == Z_STREAM_END;
/* compute number of uncompressed bytes read after offset */
ret = skip ? 0 : len - strm.avail_out;
/* clean up and return bytes read or error */
extract_ret:
(void)inflateEnd(&strm);
if (state && ret == len && !isEnd) {
state->nextOffset = orig_offset + len;
state->strm = strm;
state->isValid = 1;
} else
(void)inflateEnd(&strm);
return ret;
}