gzip-iso: Speedup some cases by using more memory.

Significant speedup on some cases by using roughly another index size
in ram. The ram usage is now up to roughly cache size plus 2x index size.

This patch adds another index-like direct access point for each span we've
visited. This replaces the single z-state which was used for
sequential extraction, and does the same, but now it can continue
sequentially on most previously visited spans instead of only from
the last read.
This commit is contained in:
Avi Halachmi (:avih) 2014-05-02 04:23:14 +03:00
parent 8684596d66
commit d28669761f
1 changed files with 126 additions and 58 deletions

View File

@ -101,21 +101,21 @@ static void WriteIndexToFile(Access* index, const wxString filename) {
} }
/////////// End of complementary utilities for zlib_indexed.c ////////// /////////// End of complementary utilities for zlib_indexed.c //////////
#define CLAMP(val, minval, maxval) (std::min(maxval, std::max(minval, val)))
class ChunksCache { class ChunksCache {
public: public:
ChunksCache(uint initialLimitMb) : m_size(0), m_entries(0), m_limit(initialLimitMb * 1024 * 1024) {}; ChunksCache(uint initialLimitMb) : m_size(0), m_entries(0), m_limit(initialLimitMb * 1024 * 1024) {};
~ChunksCache() { SetLimit(0); }; ~ChunksCache() { Clear(); };
void SetLimit(uint megabytes); void SetLimit(uint megabytes);
void Clear() { MatchLimit(true); };
void Take(void* pMallocedSrc, PX_off_t offset, int length, int coverage); void Take(void* pMallocedSrc, PX_off_t offset, int length, int coverage);
int Read(void* pDest, PX_off_t offset, int length); int Read(void* pDest, PX_off_t offset, int length);
static int CopyAvailable(void* pSrc, PX_off_t srcOffset, int srcSize, static int CopyAvailable(void* pSrc, PX_off_t srcOffset, int srcSize,
void* pDst, PX_off_t dstOffset, int maxCopySize) { void* pDst, PX_off_t dstOffset, int maxCopySize) {
int available = std::min(maxCopySize, (int)(srcOffset + srcSize - dstOffset)); int available = CLAMP(maxCopySize, 0, (int)(srcOffset + srcSize - dstOffset));
if (available < 0)
available = 0;
memcpy(pDst, (char*)pSrc + (dstOffset - srcOffset), available); memcpy(pDst, (char*)pSrc + (dstOffset - srcOffset), available);
return available; return available;
}; };
@ -127,7 +127,7 @@ private:
offset(offset), offset(offset),
size(length), size(length),
coverage(coverage) coverage(coverage)
{}; {};
~CacheEntry() { if (data) free(data); }; ~CacheEntry() { if (data) free(data); };
@ -138,7 +138,7 @@ private:
}; };
std::list<CacheEntry*> m_entries; std::list<CacheEntry*> m_entries;
void MatchLimit(); void MatchLimit(bool removeAll = false);
PX_off_t m_size; PX_off_t m_size;
PX_off_t m_limit; PX_off_t m_limit;
}; };
@ -148,9 +148,9 @@ void ChunksCache::SetLimit(uint megabytes) {
MatchLimit(); MatchLimit();
} }
void ChunksCache::MatchLimit() { void ChunksCache::MatchLimit(bool removeAll) {
std::list<CacheEntry*>::reverse_iterator rit; std::list<CacheEntry*>::reverse_iterator rit;
while (m_entries.size() && m_size > m_limit) { while (m_entries.size() && (removeAll || m_size > m_limit)) {
rit = m_entries.rbegin(); rit = m_entries.rbegin();
m_size -= (*rit)->size; m_size -= (*rit)->size;
delete(*rit); delete(*rit);
@ -190,7 +190,8 @@ static void WarnOldIndex(const wxString& filename) {
} }
#define SPAN_DEFAULT (1048576L * 4) /* distance between direct access points when creating a new index */ #define SPAN_DEFAULT (1048576L * 4) /* distance between direct access points when creating a new index */
#define CACHE_SIZE_MB 200 /* max cache size for extracted data */ #define READ_CHUNK_SIZE (256 * 1024) /* zlib extraction chunks size (at 0-based boundaries) */
#define CACHE_SIZE_MB 200 /* cache size for extracted data. must be at least READ_CHUNK_SIZE (in MB)*/
class GzippedFileReader : public AsyncFileReader class GzippedFileReader : public AsyncFileReader
{ {
@ -198,9 +199,10 @@ class GzippedFileReader : public AsyncFileReader
public: public:
GzippedFileReader(void) : GzippedFileReader(void) :
m_pIndex(0), m_pIndex(0),
m_cache(CACHE_SIZE_MB) { m_cache(CACHE_SIZE_MB),
m_src(0),
m_zstates(0) {
m_blocksize = 2048; m_blocksize = 2048;
m_zstate.isValid = 0;
}; };
virtual ~GzippedFileReader(void) { Close(); }; virtual ~GzippedFileReader(void) { Close(); };
@ -222,20 +224,46 @@ public:
return (int)((m_pIndex ? m_pIndex->uncompressed_size : 0) / m_blocksize); return (int)((m_pIndex ? m_pIndex->uncompressed_size : 0) / m_blocksize);
}; };
// Same as FlatFileReader, but in case it changes
virtual void SetBlockSize(uint bytes) { m_blocksize = bytes; } virtual void SetBlockSize(uint bytes) { m_blocksize = bytes; }
virtual void SetDataOffset(uint bytes) { m_dataoffset = bytes; } virtual void SetDataOffset(uint bytes) { m_dataoffset = bytes; }
private: private:
class Czstate {
public:
Czstate() { state.isValid = 0; };
~Czstate() { Kill(); };
void Kill() {
if (state.isValid)
inflateEnd(&state.strm);
state.isValid = 0;
}
Zstate state;
};
bool OkIndex(); // Verifies that we have an index, or try to create one bool OkIndex(); // Verifies that we have an index, or try to create one
void GetOptimalChunkForExtraction(PX_off_t offset, PX_off_t& out_chunkStart, int& out_chunkLen); PX_off_t GetOptimalExtractionStart(PX_off_t offset);
int _ReadSync(void* pBuffer, PX_off_t offset, uint bytesToRead); int _ReadSync(void* pBuffer, PX_off_t offset, uint bytesToRead);
void InitZstates();
int mBytesRead; // Temp sync read result when simulating async read int mBytesRead; // Temp sync read result when simulating async read
Access* m_pIndex; // Quick access index Access* m_pIndex; // Quick access index
Zstate m_zstate; Czstate* m_zstates;
FILE* m_src;
ChunksCache m_cache; ChunksCache m_cache;
}; };
void GzippedFileReader::InitZstates() {
if (m_zstates) {
delete[] m_zstates;
m_zstates = 0;
}
if (!m_pIndex)
return;
// having another extra element helps avoiding logic for last (so 2+ instead of 1+)
int size = 2 + m_pIndex->uncompressed_size / m_pIndex->span;
m_zstates = new Czstate[size]();
}
// TODO: do better than just checking existance and extension // TODO: do better than just checking existance and extension
bool GzippedFileReader::CanHandle(const wxString& fileName) { bool GzippedFileReader::CanHandle(const wxString& fileName) {
@ -257,6 +285,7 @@ bool GzippedFileReader::OkIndex() {
Console.Warning("It will work fine, but if you want to generate a new index with default intervals, delete this index file."); Console.Warning("It will work fine, but if you want to generate a new index with default intervals, delete this index file.");
Console.Warning("(smaller intervals mean bigger index file and quicker but more frequent decompressions)"); Console.Warning("(smaller intervals mean bigger index file and quicker but more frequent decompressions)");
} }
InitZstates();
return true; return true;
} }
@ -274,16 +303,18 @@ bool GzippedFileReader::OkIndex() {
WriteIndexToFile((Access*)m_pIndex, indexfile); WriteIndexToFile((Access*)m_pIndex, indexfile);
} else { } else {
Console.Error("ERROR (%d): index could not be generated for file '%s'", len, (const char*)m_filename.To8BitData()); Console.Error("ERROR (%d): index could not be generated for file '%s'", len, (const char*)m_filename.To8BitData());
InitZstates();
return false; return false;
} }
InitZstates();
return true; return true;
} }
bool GzippedFileReader::Open(const wxString& fileName) { bool GzippedFileReader::Open(const wxString& fileName) {
Close(); Close();
m_filename = fileName; m_filename = fileName;
if (!CanHandle(fileName) || !OkIndex()) { if (!(m_src = fopen(m_filename.ToUTF8(), "rb")) || !CanHandle(fileName) || !OkIndex()) {
Close(); Close();
return false; return false;
}; };
@ -309,68 +340,102 @@ int GzippedFileReader::FinishRead(void) {
int GzippedFileReader::ReadSync(void* pBuffer, uint sector, uint count) { int GzippedFileReader::ReadSync(void* pBuffer, uint sector, uint count) {
PX_off_t offset = (s64)sector * m_blocksize + m_dataoffset; PX_off_t offset = (s64)sector * m_blocksize + m_dataoffset;
int bytesToRead = count * m_blocksize; int bytesToRead = count * m_blocksize;
return _ReadSync(pBuffer, offset, bytesToRead); int res = _ReadSync(pBuffer, offset, bytesToRead);
if (res < 0)
Console.Error("Error: iso-gzip read unsuccessful.");
return res;
} }
#define SEQUENTIAL_CHUNK (256 * 1024) // If we have a valid and adequate zstate for this span, use it, else, use the index
void GzippedFileReader::GetOptimalChunkForExtraction(PX_off_t offset, PX_off_t& out_chunkStart, int& out_chunkLen) { PX_off_t GzippedFileReader::GetOptimalExtractionStart(PX_off_t offset) {
// The optimal extraction size is m_pIndex->span for minimal extraction on continuous access. int span = m_pIndex->span;
// However, to keep the index small, span has to be relatively big (e.g. 4M) and extracting it Czstate& cstate = m_zstates[offset / span];
// in one go is sometimes more than games are willing to accept nicely. PX_off_t stateOffset = cstate.state.isValid ? cstate.state.out_offset : 0;
// But since sequential access is practically free (regardless of span) due to storing the state of the if (stateOffset && stateOffset <= offset)
// decompressor (m_zstate), we're always setting the extract chunk to significantly smaller than span return stateOffset; // state is faster than indexed
// (e.g. span = 4M and extract = 256k).
// This results is quickest possible sequential extraction and minimal time for random access. // If span is not exact multiples of READ_CHUNK_SIZE (because it was configured badly),
// TODO: cache also the extracted data between span boundary and SEQUENTIAL_CHUNK boundary on random. // we fallback to always READ_CHUNK_SIZE boundaries
out_chunkStart = (PX_off_t)SEQUENTIAL_CHUNK * (offset / SEQUENTIAL_CHUNK); if (span % READ_CHUNK_SIZE)
out_chunkLen = SEQUENTIAL_CHUNK; return offset / READ_CHUNK_SIZE * READ_CHUNK_SIZE;
return span * (offset / span); // index direct access boundaries
} }
int GzippedFileReader::_ReadSync(void* pBuffer, PX_off_t offset, uint bytesToRead) { int GzippedFileReader::_ReadSync(void* pBuffer, PX_off_t offset, uint bytesToRead) {
if (!OkIndex()) if (!OkIndex())
return -1; return -1;
//Make sure the request is inside a single optimal span, or split it otherwise to make it so // Without all the caching, chunking and states, this would be enough:
PX_off_t chunkStart; int chunkLen; // return extract(m_src, m_pIndex, offset, (unsigned char*)pBuffer, bytesToRead);
GetOptimalChunkForExtraction(offset, chunkStart, chunkLen);
uint maxInChunk = chunkStart + chunkLen - offset; // Split request to READ_CHUNK_SIZE chunks at READ_CHUNK_SIZE boundaries
uint maxInChunk = READ_CHUNK_SIZE - offset % READ_CHUNK_SIZE;
if (bytesToRead > maxInChunk) { if (bytesToRead > maxInChunk) {
int res1 = _ReadSync(pBuffer, offset, maxInChunk); int first = _ReadSync(pBuffer, offset, maxInChunk);
if (res1 != maxInChunk) if (first != maxInChunk)
return res1; // failure or EOF return first; // EOF or failure
int res2 = _ReadSync((char*)pBuffer + maxInChunk, offset + maxInChunk, bytesToRead - maxInChunk); int rest = _ReadSync((char*)pBuffer + maxInChunk, offset + maxInChunk, bytesToRead - maxInChunk);
if (res2 < 0) if (rest < 0)
return res2; return rest;
return res1 + res2; return first + rest;
} }
// From here onwards it's guarenteed that the request is inside a single optimal extractable/cacheable span // From here onwards it's guarenteed that the request is inside a single READ_CHUNK_SIZE boundaries
int res = m_cache.Read(pBuffer, offset, bytesToRead); int res = m_cache.Read(pBuffer, offset, bytesToRead);
if (res >= 0) if (res >= 0)
return res; return res;
// Not available from cache. Decompress a chunk with optimal boundaries // Not available from cache. Decompress from optimal starting
// which contains the request. // point in READ_CHUNK_SIZE chunks and cache each chunk.
char* chunk = (char*)malloc(chunkLen);
PTT s = NOW(); PTT s = NOW();
FILE* in = fopen(m_filename.ToUTF8(), "rb"); PX_off_t extractOffset = GetOptimalExtractionStart(offset); // guaranteed in READ_CHUNK_SIZE boundaries
res = extract(in, m_pIndex, chunkStart, (unsigned char*)chunk, chunkLen, &m_zstate); int size = offset + maxInChunk - extractOffset;
fclose(in); unsigned char* extracted = (unsigned char*)malloc(size);
int span = m_pIndex->span;
int spanix = extractOffset / span;
res = extract(m_src, m_pIndex, extractOffset, extracted, size, &(m_zstates[spanix].state));
if (res < 0) {
free(extracted);
return res;
}
int copied = ChunksCache::CopyAvailable(extracted, extractOffset, res, pBuffer, offset, bytesToRead);
if (m_zstates[spanix].state.isValid && (extractOffset + res) / span != offset / span) {
// The state no longer matches this span.
// move the state to the appropriate span because it will be faster than using the index
int targetix = (extractOffset + res) / span;
m_zstates[targetix].Kill();
m_zstates[targetix] = m_zstates[spanix]; // We have elements for the entire file, and another one.
m_zstates[spanix].state.isValid = 0; // Not killing because we need the state.
}
if (size <= READ_CHUNK_SIZE)
m_cache.Take(extracted, extractOffset, res, size);
else { // split into cacheable chunks
for (int i = 0; i < size; i += READ_CHUNK_SIZE) {
int available = CLAMP(res - i, 0, READ_CHUNK_SIZE);
void* chunk = available ? malloc(available) : 0;
if (available)
memcpy(chunk, extracted + i, available);
m_cache.Take(chunk, extractOffset + i, available, std::min(size - i, READ_CHUNK_SIZE));
}
free(extracted);
}
int duration = NOW() - s; int duration = NOW() - s;
if (duration > 10) if (duration > 10)
Console.WriteLn(Color_Gray, "gunzip: %1.2f MB - %d ms", (float)(chunkLen) / 1024 / 1024, duration); Console.WriteLn(Color_Gray, "gunzip: chunk #%5d-%2d : %1.2f MB - %d ms",
(int)(offset / 4 / 1024 / 1024),
(int)(offset % (4 * 1024 * 1024) / READ_CHUNK_SIZE),
(float)size / 1024 / 1024,
duration);
if (res < 0) return copied;
return res;
int available = ChunksCache::CopyAvailable(chunk, chunkStart, res, pBuffer, offset, bytesToRead);
m_cache.Take(chunk, chunkStart, res, chunkLen);
return available;
} }
void GzippedFileReader::Close() { void GzippedFileReader::Close() {
@ -380,9 +445,12 @@ void GzippedFileReader::Close() {
m_pIndex = 0; m_pIndex = 0;
} }
if (m_zstate.isValid) { InitZstates(); // results in delete because no index
(void)inflateEnd(&m_zstate.strm); m_cache.Clear();
m_zstate.isValid = false;
if (m_src) {
fclose(m_src);
m_src = 0;
} }
} }