mirror of https://github.com/PCSX2/pcsx2.git
gzip-iso: Speedup some cases by using more memory.
Significant speedup on some cases by using roughly another index size in ram. The ram usage is now up to roughly cache size plus 2x index size. This patch adds another index-like direct access point for each span we've visited. This replaces the single z-state which was used for sequential extraction, and does the same, but now it can continue sequentially on most previously visited spans instead of only from the last read.
This commit is contained in:
parent
8684596d66
commit
d28669761f
|
@ -101,21 +101,21 @@ static void WriteIndexToFile(Access* index, const wxString filename) {
|
||||||
}
|
}
|
||||||
|
|
||||||
/////////// End of complementary utilities for zlib_indexed.c //////////
|
/////////// End of complementary utilities for zlib_indexed.c //////////
|
||||||
|
#define CLAMP(val, minval, maxval) (std::min(maxval, std::max(minval, val)))
|
||||||
|
|
||||||
class ChunksCache {
|
class ChunksCache {
|
||||||
public:
|
public:
|
||||||
ChunksCache(uint initialLimitMb) : m_size(0), m_entries(0), m_limit(initialLimitMb * 1024 * 1024) {};
|
ChunksCache(uint initialLimitMb) : m_size(0), m_entries(0), m_limit(initialLimitMb * 1024 * 1024) {};
|
||||||
~ChunksCache() { SetLimit(0); };
|
~ChunksCache() { Clear(); };
|
||||||
void SetLimit(uint megabytes);
|
void SetLimit(uint megabytes);
|
||||||
|
void Clear() { MatchLimit(true); };
|
||||||
|
|
||||||
void Take(void* pMallocedSrc, PX_off_t offset, int length, int coverage);
|
void Take(void* pMallocedSrc, PX_off_t offset, int length, int coverage);
|
||||||
int Read(void* pDest, PX_off_t offset, int length);
|
int Read(void* pDest, PX_off_t offset, int length);
|
||||||
|
|
||||||
static int CopyAvailable(void* pSrc, PX_off_t srcOffset, int srcSize,
|
static int CopyAvailable(void* pSrc, PX_off_t srcOffset, int srcSize,
|
||||||
void* pDst, PX_off_t dstOffset, int maxCopySize) {
|
void* pDst, PX_off_t dstOffset, int maxCopySize) {
|
||||||
int available = std::min(maxCopySize, (int)(srcOffset + srcSize - dstOffset));
|
int available = CLAMP(maxCopySize, 0, (int)(srcOffset + srcSize - dstOffset));
|
||||||
if (available < 0)
|
|
||||||
available = 0;
|
|
||||||
memcpy(pDst, (char*)pSrc + (dstOffset - srcOffset), available);
|
memcpy(pDst, (char*)pSrc + (dstOffset - srcOffset), available);
|
||||||
return available;
|
return available;
|
||||||
};
|
};
|
||||||
|
@ -127,7 +127,7 @@ private:
|
||||||
offset(offset),
|
offset(offset),
|
||||||
size(length),
|
size(length),
|
||||||
coverage(coverage)
|
coverage(coverage)
|
||||||
{};
|
{};
|
||||||
|
|
||||||
~CacheEntry() { if (data) free(data); };
|
~CacheEntry() { if (data) free(data); };
|
||||||
|
|
||||||
|
@ -138,7 +138,7 @@ private:
|
||||||
};
|
};
|
||||||
|
|
||||||
std::list<CacheEntry*> m_entries;
|
std::list<CacheEntry*> m_entries;
|
||||||
void MatchLimit();
|
void MatchLimit(bool removeAll = false);
|
||||||
PX_off_t m_size;
|
PX_off_t m_size;
|
||||||
PX_off_t m_limit;
|
PX_off_t m_limit;
|
||||||
};
|
};
|
||||||
|
@ -148,9 +148,9 @@ void ChunksCache::SetLimit(uint megabytes) {
|
||||||
MatchLimit();
|
MatchLimit();
|
||||||
}
|
}
|
||||||
|
|
||||||
void ChunksCache::MatchLimit() {
|
void ChunksCache::MatchLimit(bool removeAll) {
|
||||||
std::list<CacheEntry*>::reverse_iterator rit;
|
std::list<CacheEntry*>::reverse_iterator rit;
|
||||||
while (m_entries.size() && m_size > m_limit) {
|
while (m_entries.size() && (removeAll || m_size > m_limit)) {
|
||||||
rit = m_entries.rbegin();
|
rit = m_entries.rbegin();
|
||||||
m_size -= (*rit)->size;
|
m_size -= (*rit)->size;
|
||||||
delete(*rit);
|
delete(*rit);
|
||||||
|
@ -190,7 +190,8 @@ static void WarnOldIndex(const wxString& filename) {
|
||||||
}
|
}
|
||||||
|
|
||||||
#define SPAN_DEFAULT (1048576L * 4) /* distance between direct access points when creating a new index */
|
#define SPAN_DEFAULT (1048576L * 4) /* distance between direct access points when creating a new index */
|
||||||
#define CACHE_SIZE_MB 200 /* max cache size for extracted data */
|
#define READ_CHUNK_SIZE (256 * 1024) /* zlib extraction chunks size (at 0-based boundaries) */
|
||||||
|
#define CACHE_SIZE_MB 200 /* cache size for extracted data. must be at least READ_CHUNK_SIZE (in MB)*/
|
||||||
|
|
||||||
class GzippedFileReader : public AsyncFileReader
|
class GzippedFileReader : public AsyncFileReader
|
||||||
{
|
{
|
||||||
|
@ -198,9 +199,10 @@ class GzippedFileReader : public AsyncFileReader
|
||||||
public:
|
public:
|
||||||
GzippedFileReader(void) :
|
GzippedFileReader(void) :
|
||||||
m_pIndex(0),
|
m_pIndex(0),
|
||||||
m_cache(CACHE_SIZE_MB) {
|
m_cache(CACHE_SIZE_MB),
|
||||||
|
m_src(0),
|
||||||
|
m_zstates(0) {
|
||||||
m_blocksize = 2048;
|
m_blocksize = 2048;
|
||||||
m_zstate.isValid = 0;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
virtual ~GzippedFileReader(void) { Close(); };
|
virtual ~GzippedFileReader(void) { Close(); };
|
||||||
|
@ -222,20 +224,46 @@ public:
|
||||||
return (int)((m_pIndex ? m_pIndex->uncompressed_size : 0) / m_blocksize);
|
return (int)((m_pIndex ? m_pIndex->uncompressed_size : 0) / m_blocksize);
|
||||||
};
|
};
|
||||||
|
|
||||||
// Same as FlatFileReader, but in case it changes
|
|
||||||
virtual void SetBlockSize(uint bytes) { m_blocksize = bytes; }
|
virtual void SetBlockSize(uint bytes) { m_blocksize = bytes; }
|
||||||
virtual void SetDataOffset(uint bytes) { m_dataoffset = bytes; }
|
virtual void SetDataOffset(uint bytes) { m_dataoffset = bytes; }
|
||||||
private:
|
private:
|
||||||
|
class Czstate {
|
||||||
|
public:
|
||||||
|
Czstate() { state.isValid = 0; };
|
||||||
|
~Czstate() { Kill(); };
|
||||||
|
void Kill() {
|
||||||
|
if (state.isValid)
|
||||||
|
inflateEnd(&state.strm);
|
||||||
|
state.isValid = 0;
|
||||||
|
}
|
||||||
|
Zstate state;
|
||||||
|
};
|
||||||
|
|
||||||
bool OkIndex(); // Verifies that we have an index, or try to create one
|
bool OkIndex(); // Verifies that we have an index, or try to create one
|
||||||
void GetOptimalChunkForExtraction(PX_off_t offset, PX_off_t& out_chunkStart, int& out_chunkLen);
|
PX_off_t GetOptimalExtractionStart(PX_off_t offset);
|
||||||
int _ReadSync(void* pBuffer, PX_off_t offset, uint bytesToRead);
|
int _ReadSync(void* pBuffer, PX_off_t offset, uint bytesToRead);
|
||||||
|
void InitZstates();
|
||||||
|
|
||||||
int mBytesRead; // Temp sync read result when simulating async read
|
int mBytesRead; // Temp sync read result when simulating async read
|
||||||
Access* m_pIndex; // Quick access index
|
Access* m_pIndex; // Quick access index
|
||||||
Zstate m_zstate;
|
Czstate* m_zstates;
|
||||||
|
FILE* m_src;
|
||||||
|
|
||||||
ChunksCache m_cache;
|
ChunksCache m_cache;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
void GzippedFileReader::InitZstates() {
|
||||||
|
if (m_zstates) {
|
||||||
|
delete[] m_zstates;
|
||||||
|
m_zstates = 0;
|
||||||
|
}
|
||||||
|
if (!m_pIndex)
|
||||||
|
return;
|
||||||
|
|
||||||
|
// having another extra element helps avoiding logic for last (so 2+ instead of 1+)
|
||||||
|
int size = 2 + m_pIndex->uncompressed_size / m_pIndex->span;
|
||||||
|
m_zstates = new Czstate[size]();
|
||||||
|
}
|
||||||
|
|
||||||
// TODO: do better than just checking existance and extension
|
// TODO: do better than just checking existance and extension
|
||||||
bool GzippedFileReader::CanHandle(const wxString& fileName) {
|
bool GzippedFileReader::CanHandle(const wxString& fileName) {
|
||||||
|
@ -257,6 +285,7 @@ bool GzippedFileReader::OkIndex() {
|
||||||
Console.Warning("It will work fine, but if you want to generate a new index with default intervals, delete this index file.");
|
Console.Warning("It will work fine, but if you want to generate a new index with default intervals, delete this index file.");
|
||||||
Console.Warning("(smaller intervals mean bigger index file and quicker but more frequent decompressions)");
|
Console.Warning("(smaller intervals mean bigger index file and quicker but more frequent decompressions)");
|
||||||
}
|
}
|
||||||
|
InitZstates();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -274,16 +303,18 @@ bool GzippedFileReader::OkIndex() {
|
||||||
WriteIndexToFile((Access*)m_pIndex, indexfile);
|
WriteIndexToFile((Access*)m_pIndex, indexfile);
|
||||||
} else {
|
} else {
|
||||||
Console.Error("ERROR (%d): index could not be generated for file '%s'", len, (const char*)m_filename.To8BitData());
|
Console.Error("ERROR (%d): index could not be generated for file '%s'", len, (const char*)m_filename.To8BitData());
|
||||||
|
InitZstates();
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
InitZstates();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool GzippedFileReader::Open(const wxString& fileName) {
|
bool GzippedFileReader::Open(const wxString& fileName) {
|
||||||
Close();
|
Close();
|
||||||
m_filename = fileName;
|
m_filename = fileName;
|
||||||
if (!CanHandle(fileName) || !OkIndex()) {
|
if (!(m_src = fopen(m_filename.ToUTF8(), "rb")) || !CanHandle(fileName) || !OkIndex()) {
|
||||||
Close();
|
Close();
|
||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
|
@ -309,68 +340,102 @@ int GzippedFileReader::FinishRead(void) {
|
||||||
int GzippedFileReader::ReadSync(void* pBuffer, uint sector, uint count) {
|
int GzippedFileReader::ReadSync(void* pBuffer, uint sector, uint count) {
|
||||||
PX_off_t offset = (s64)sector * m_blocksize + m_dataoffset;
|
PX_off_t offset = (s64)sector * m_blocksize + m_dataoffset;
|
||||||
int bytesToRead = count * m_blocksize;
|
int bytesToRead = count * m_blocksize;
|
||||||
return _ReadSync(pBuffer, offset, bytesToRead);
|
int res = _ReadSync(pBuffer, offset, bytesToRead);
|
||||||
|
if (res < 0)
|
||||||
|
Console.Error("Error: iso-gzip read unsuccessful.");
|
||||||
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define SEQUENTIAL_CHUNK (256 * 1024)
|
// If we have a valid and adequate zstate for this span, use it, else, use the index
|
||||||
void GzippedFileReader::GetOptimalChunkForExtraction(PX_off_t offset, PX_off_t& out_chunkStart, int& out_chunkLen) {
|
PX_off_t GzippedFileReader::GetOptimalExtractionStart(PX_off_t offset) {
|
||||||
// The optimal extraction size is m_pIndex->span for minimal extraction on continuous access.
|
int span = m_pIndex->span;
|
||||||
// However, to keep the index small, span has to be relatively big (e.g. 4M) and extracting it
|
Czstate& cstate = m_zstates[offset / span];
|
||||||
// in one go is sometimes more than games are willing to accept nicely.
|
PX_off_t stateOffset = cstate.state.isValid ? cstate.state.out_offset : 0;
|
||||||
// But since sequential access is practically free (regardless of span) due to storing the state of the
|
if (stateOffset && stateOffset <= offset)
|
||||||
// decompressor (m_zstate), we're always setting the extract chunk to significantly smaller than span
|
return stateOffset; // state is faster than indexed
|
||||||
// (e.g. span = 4M and extract = 256k).
|
|
||||||
// This results is quickest possible sequential extraction and minimal time for random access.
|
// If span is not exact multiples of READ_CHUNK_SIZE (because it was configured badly),
|
||||||
// TODO: cache also the extracted data between span boundary and SEQUENTIAL_CHUNK boundary on random.
|
// we fallback to always READ_CHUNK_SIZE boundaries
|
||||||
out_chunkStart = (PX_off_t)SEQUENTIAL_CHUNK * (offset / SEQUENTIAL_CHUNK);
|
if (span % READ_CHUNK_SIZE)
|
||||||
out_chunkLen = SEQUENTIAL_CHUNK;
|
return offset / READ_CHUNK_SIZE * READ_CHUNK_SIZE;
|
||||||
|
|
||||||
|
return span * (offset / span); // index direct access boundaries
|
||||||
}
|
}
|
||||||
|
|
||||||
int GzippedFileReader::_ReadSync(void* pBuffer, PX_off_t offset, uint bytesToRead) {
|
int GzippedFileReader::_ReadSync(void* pBuffer, PX_off_t offset, uint bytesToRead) {
|
||||||
if (!OkIndex())
|
if (!OkIndex())
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
//Make sure the request is inside a single optimal span, or split it otherwise to make it so
|
// Without all the caching, chunking and states, this would be enough:
|
||||||
PX_off_t chunkStart; int chunkLen;
|
// return extract(m_src, m_pIndex, offset, (unsigned char*)pBuffer, bytesToRead);
|
||||||
GetOptimalChunkForExtraction(offset, chunkStart, chunkLen);
|
|
||||||
uint maxInChunk = chunkStart + chunkLen - offset;
|
// Split request to READ_CHUNK_SIZE chunks at READ_CHUNK_SIZE boundaries
|
||||||
|
uint maxInChunk = READ_CHUNK_SIZE - offset % READ_CHUNK_SIZE;
|
||||||
if (bytesToRead > maxInChunk) {
|
if (bytesToRead > maxInChunk) {
|
||||||
int res1 = _ReadSync(pBuffer, offset, maxInChunk);
|
int first = _ReadSync(pBuffer, offset, maxInChunk);
|
||||||
if (res1 != maxInChunk)
|
if (first != maxInChunk)
|
||||||
return res1; // failure or EOF
|
return first; // EOF or failure
|
||||||
|
|
||||||
int res2 = _ReadSync((char*)pBuffer + maxInChunk, offset + maxInChunk, bytesToRead - maxInChunk);
|
int rest = _ReadSync((char*)pBuffer + maxInChunk, offset + maxInChunk, bytesToRead - maxInChunk);
|
||||||
if (res2 < 0)
|
if (rest < 0)
|
||||||
return res2;
|
return rest;
|
||||||
|
|
||||||
return res1 + res2;
|
return first + rest;
|
||||||
}
|
}
|
||||||
|
|
||||||
// From here onwards it's guarenteed that the request is inside a single optimal extractable/cacheable span
|
// From here onwards it's guarenteed that the request is inside a single READ_CHUNK_SIZE boundaries
|
||||||
|
|
||||||
int res = m_cache.Read(pBuffer, offset, bytesToRead);
|
int res = m_cache.Read(pBuffer, offset, bytesToRead);
|
||||||
if (res >= 0)
|
if (res >= 0)
|
||||||
return res;
|
return res;
|
||||||
|
|
||||||
// Not available from cache. Decompress a chunk with optimal boundaries
|
// Not available from cache. Decompress from optimal starting
|
||||||
// which contains the request.
|
// point in READ_CHUNK_SIZE chunks and cache each chunk.
|
||||||
char* chunk = (char*)malloc(chunkLen);
|
|
||||||
|
|
||||||
PTT s = NOW();
|
PTT s = NOW();
|
||||||
FILE* in = fopen(m_filename.ToUTF8(), "rb");
|
PX_off_t extractOffset = GetOptimalExtractionStart(offset); // guaranteed in READ_CHUNK_SIZE boundaries
|
||||||
res = extract(in, m_pIndex, chunkStart, (unsigned char*)chunk, chunkLen, &m_zstate);
|
int size = offset + maxInChunk - extractOffset;
|
||||||
fclose(in);
|
unsigned char* extracted = (unsigned char*)malloc(size);
|
||||||
|
|
||||||
|
int span = m_pIndex->span;
|
||||||
|
int spanix = extractOffset / span;
|
||||||
|
res = extract(m_src, m_pIndex, extractOffset, extracted, size, &(m_zstates[spanix].state));
|
||||||
|
if (res < 0) {
|
||||||
|
free(extracted);
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
int copied = ChunksCache::CopyAvailable(extracted, extractOffset, res, pBuffer, offset, bytesToRead);
|
||||||
|
|
||||||
|
if (m_zstates[spanix].state.isValid && (extractOffset + res) / span != offset / span) {
|
||||||
|
// The state no longer matches this span.
|
||||||
|
// move the state to the appropriate span because it will be faster than using the index
|
||||||
|
int targetix = (extractOffset + res) / span;
|
||||||
|
m_zstates[targetix].Kill();
|
||||||
|
m_zstates[targetix] = m_zstates[spanix]; // We have elements for the entire file, and another one.
|
||||||
|
m_zstates[spanix].state.isValid = 0; // Not killing because we need the state.
|
||||||
|
}
|
||||||
|
|
||||||
|
if (size <= READ_CHUNK_SIZE)
|
||||||
|
m_cache.Take(extracted, extractOffset, res, size);
|
||||||
|
else { // split into cacheable chunks
|
||||||
|
for (int i = 0; i < size; i += READ_CHUNK_SIZE) {
|
||||||
|
int available = CLAMP(res - i, 0, READ_CHUNK_SIZE);
|
||||||
|
void* chunk = available ? malloc(available) : 0;
|
||||||
|
if (available)
|
||||||
|
memcpy(chunk, extracted + i, available);
|
||||||
|
m_cache.Take(chunk, extractOffset + i, available, std::min(size - i, READ_CHUNK_SIZE));
|
||||||
|
}
|
||||||
|
free(extracted);
|
||||||
|
}
|
||||||
|
|
||||||
int duration = NOW() - s;
|
int duration = NOW() - s;
|
||||||
if (duration > 10)
|
if (duration > 10)
|
||||||
Console.WriteLn(Color_Gray, "gunzip: %1.2f MB - %d ms", (float)(chunkLen) / 1024 / 1024, duration);
|
Console.WriteLn(Color_Gray, "gunzip: chunk #%5d-%2d : %1.2f MB - %d ms",
|
||||||
|
(int)(offset / 4 / 1024 / 1024),
|
||||||
|
(int)(offset % (4 * 1024 * 1024) / READ_CHUNK_SIZE),
|
||||||
|
(float)size / 1024 / 1024,
|
||||||
|
duration);
|
||||||
|
|
||||||
if (res < 0)
|
return copied;
|
||||||
return res;
|
|
||||||
|
|
||||||
int available = ChunksCache::CopyAvailable(chunk, chunkStart, res, pBuffer, offset, bytesToRead);
|
|
||||||
m_cache.Take(chunk, chunkStart, res, chunkLen);
|
|
||||||
|
|
||||||
return available;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void GzippedFileReader::Close() {
|
void GzippedFileReader::Close() {
|
||||||
|
@ -380,9 +445,12 @@ void GzippedFileReader::Close() {
|
||||||
m_pIndex = 0;
|
m_pIndex = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m_zstate.isValid) {
|
InitZstates(); // results in delete because no index
|
||||||
(void)inflateEnd(&m_zstate.strm);
|
m_cache.Clear();
|
||||||
m_zstate.isValid = false;
|
|
||||||
|
if (m_src) {
|
||||||
|
fclose(m_src);
|
||||||
|
m_src = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue