gzip-iso: Speedup some cases by using more memory.

Significant speedup on some cases by using roughly another index size in ram. The ram usage is now up to roughly cache size plus 2x index size. This patch adds another index-like direct access point for each span we've visited. This replaces the single z-state which was used for sequential extraction, and does the same, but now it can continue sequentially on most previously visited spans instead of only from the last read.
2014-05-02 04:23:14 +03:00 · 2014-05-02 04:23:14 +03:00 · d28669761f
parent 8684596d66
commit d28669761f
1 changed files with 126 additions and 58 deletions
--- a/pcsx2/CDVD/CompressedFileReader.cpp
+++ b/pcsx2/CDVD/CompressedFileReader.cpp
@ -101,21 +101,21 @@ static void WriteIndexToFile(Access* index, const wxString filename) {
 }
 /////////// End of complementary utilities for zlib_indexed.c //////////
 #define CLAMP(val, minval, maxval) (std::min(maxval, std::max(minval, val)))
 class ChunksCache {
 public:
 	ChunksCache(uint initialLimitMb) : m_size(0), m_entries(0), m_limit(initialLimitMb * 1024 * 1024) {};
-	~ChunksCache() { SetLimit(0); };
+	~ChunksCache() { Clear(); };
 	void SetLimit(uint megabytes);
 	void Clear() { MatchLimit(true); };
 	void Take(void* pMallocedSrc, PX_off_t offset, int length, int coverage);
 	int  Read(void* pDest,        PX_off_t offset, int length);
 	static int CopyAvailable(void* pSrc, PX_off_t srcOffset, int srcSize,
 							 void* pDst, PX_off_t dstOffset, int maxCopySize) {
-		int available = std::min(maxCopySize, (int)(srcOffset + srcSize - dstOffset));
+		int available = CLAMP(maxCopySize, 0, (int)(srcOffset + srcSize - dstOffset));
 		if (available < 0)
 			available = 0;
 		memcpy(pDst, (char*)pSrc + (dstOffset - srcOffset), available);
 		return available;
 	};
@ -127,7 +127,7 @@ private:
 			offset(offset),
 			size(length),
 			coverage(coverage)
-			{};
+		{};
 		~CacheEntry() { if (data) free(data); };
@ -138,7 +138,7 @@ private:
 	};
 	std::list<CacheEntry*> m_entries;
-	void MatchLimit();
+	void MatchLimit(bool removeAll = false);
 	PX_off_t m_size;
 	PX_off_t m_limit;
 };
@ -148,9 +148,9 @@ void ChunksCache::SetLimit(uint megabytes) {
 	MatchLimit();
 }
-void ChunksCache::MatchLimit() {
+void ChunksCache::MatchLimit(bool removeAll) {
 	std::list<CacheEntry*>::reverse_iterator rit;
-	while (m_entries.size() && m_size > m_limit) {
+	while (m_entries.size() && (removeAll || m_size > m_limit)) {
 		rit = m_entries.rbegin();
 		m_size -= (*rit)->size;
 		delete(*rit);
@ -190,7 +190,8 @@ static void WarnOldIndex(const wxString& filename) {
 }
 #define SPAN_DEFAULT (1048576L * 4)   /* distance between direct access points when creating a new index */
-#define CACHE_SIZE_MB 200             /* max cache size for extracted data */
+#define READ_CHUNK_SIZE (256 * 1024)  /* zlib extraction chunks size (at 0-based boundaries) */
 #define CACHE_SIZE_MB 200             /* cache size for extracted data. must be at least READ_CHUNK_SIZE (in MB)*/
 class GzippedFileReader : public AsyncFileReader
 {
@ -198,9 +199,10 @@ class GzippedFileReader : public AsyncFileReader
 public:
 	GzippedFileReader(void) :
 		m_pIndex(0),
-		m_cache(CACHE_SIZE_MB) {
+		m_cache(CACHE_SIZE_MB),
 		m_src(0),
 		m_zstates(0) {
 		m_blocksize = 2048;
 		m_zstate.isValid = 0;
 	};
 	virtual ~GzippedFileReader(void) { Close(); };
@ -222,20 +224,46 @@ public:
 		return (int)((m_pIndex ? m_pIndex->uncompressed_size : 0) / m_blocksize);
 	};
 	// Same as FlatFileReader, but in case it changes
 	virtual void SetBlockSize(uint bytes) { m_blocksize = bytes; }
 	virtual void SetDataOffset(uint bytes) { m_dataoffset = bytes; }
 private:
 	class Czstate {
 	public:
 		Czstate() { state.isValid = 0; };
 		~Czstate() { Kill(); };
 		void Kill() {
 			if (state.isValid)
 				inflateEnd(&state.strm);
 			state.isValid = 0;
 		}
 		Zstate state;
 	};
 	bool	OkIndex();  // Verifies that we have an index, or try to create one
-	void	GetOptimalChunkForExtraction(PX_off_t offset, PX_off_t& out_chunkStart, int& out_chunkLen);
+	PX_off_t GetOptimalExtractionStart(PX_off_t offset);
 	int     _ReadSync(void* pBuffer, PX_off_t offset, uint bytesToRead);
 	void	InitZstates();
 	int		mBytesRead; // Temp sync read result when simulating async read
 	Access* m_pIndex;   // Quick access index
-	Zstate  m_zstate;
+	Czstate* m_zstates;
 	FILE*	m_src;
 	ChunksCache m_cache;
 };
 void GzippedFileReader::InitZstates() {
 	if (m_zstates) {
 		delete[] m_zstates;
 		m_zstates = 0;
 	}
 	if (!m_pIndex)
 		return;
 	// having another extra element helps avoiding logic for last (so 2+ instead of 1+)
 	int size = 2 + m_pIndex->uncompressed_size / m_pIndex->span;
 	m_zstates = new Czstate[size]();
 }
 // TODO: do better than just checking existance and extension
 bool GzippedFileReader::CanHandle(const wxString& fileName) {
@ -257,6 +285,7 @@ bool GzippedFileReader::OkIndex() {
 			Console.Warning("It will work fine, but if you want to generate a new index with default intervals, delete this index file.");
 			Console.Warning("(smaller intervals mean bigger index file and quicker but more frequent decompressions)");
 		}
 		InitZstates();
 		return true;
 	}
@ -274,16 +303,18 @@ bool GzippedFileReader::OkIndex() {
 		WriteIndexToFile((Access*)m_pIndex, indexfile);
 	} else {
 		Console.Error("ERROR (%d): index could not be generated for file '%s'", len, (const char*)m_filename.To8BitData());
 		InitZstates();
 		return false;
 	}
 	InitZstates();
 	return true;
 }
 bool GzippedFileReader::Open(const wxString& fileName) {
 	Close();
 	m_filename = fileName;
-	if (!CanHandle(fileName) || !OkIndex()) {
+	if (!(m_src = fopen(m_filename.ToUTF8(), "rb")) || !CanHandle(fileName) || !OkIndex()) {
 		Close();
 		return false;
 	};
@ -309,68 +340,102 @@ int GzippedFileReader::FinishRead(void) {
 int GzippedFileReader::ReadSync(void* pBuffer, uint sector, uint count) {
 	PX_off_t offset = (s64)sector * m_blocksize + m_dataoffset;
 	int bytesToRead = count * m_blocksize;
-	return _ReadSync(pBuffer, offset, bytesToRead);
+	int res = _ReadSync(pBuffer, offset, bytesToRead);
 	if (res < 0)
 		Console.Error("Error: iso-gzip read unsuccessful.");
 	return res;
 }
-#define SEQUENTIAL_CHUNK (256 * 1024)
+// If we have a valid and adequate zstate for this span, use it, else, use the index
-void GzippedFileReader::GetOptimalChunkForExtraction(PX_off_t offset, PX_off_t& out_chunkStart, int& out_chunkLen) {
+PX_off_t GzippedFileReader::GetOptimalExtractionStart(PX_off_t offset) {
-	// The optimal extraction size is m_pIndex->span for minimal extraction on continuous access.
+	int span = m_pIndex->span;
-	// However, to keep the index small, span has to be relatively big (e.g. 4M) and extracting it
+	Czstate& cstate = m_zstates[offset / span];
-	// in one go is sometimes more than games are willing to accept nicely.
+	PX_off_t stateOffset = cstate.state.isValid ? cstate.state.out_offset : 0;
-	// But since sequential access is practically free (regardless of span) due to storing the state of the
+	if (stateOffset && stateOffset <= offset)
-	// decompressor (m_zstate), we're always setting the extract chunk to significantly smaller than span
+		return stateOffset; // state is faster than indexed
-	// (e.g. span = 4M and extract = 256k).
+
-	// This results is quickest possible sequential extraction and minimal time for random access.
+	// If span is not exact multiples of READ_CHUNK_SIZE (because it was configured badly),
-	// TODO: cache also the extracted data between span boundary and SEQUENTIAL_CHUNK boundary on random.
+	// we fallback to always READ_CHUNK_SIZE boundaries
-	out_chunkStart = (PX_off_t)SEQUENTIAL_CHUNK * (offset / SEQUENTIAL_CHUNK);
+	if (span % READ_CHUNK_SIZE)
-	out_chunkLen = SEQUENTIAL_CHUNK;
+		return offset / READ_CHUNK_SIZE * READ_CHUNK_SIZE;
 	return span * (offset / span); // index direct access boundaries
 }
 int GzippedFileReader::_ReadSync(void* pBuffer, PX_off_t offset, uint bytesToRead) {
 	if (!OkIndex())
 		return -1;
-	//Make sure the request is inside a single optimal span, or split it otherwise to make it so
+	// Without all the caching, chunking and states, this would be enough:
-	PX_off_t chunkStart; int chunkLen;
+	// return extract(m_src, m_pIndex, offset, (unsigned char*)pBuffer, bytesToRead);
-	GetOptimalChunkForExtraction(offset, chunkStart, chunkLen);
+
-	uint maxInChunk = chunkStart + chunkLen - offset;
+	// Split request to READ_CHUNK_SIZE chunks at READ_CHUNK_SIZE boundaries
 	uint maxInChunk = READ_CHUNK_SIZE - offset % READ_CHUNK_SIZE;
 	if (bytesToRead > maxInChunk) {
-		int res1 = _ReadSync(pBuffer, offset, maxInChunk);
+		int first = _ReadSync(pBuffer, offset, maxInChunk);
-		if (res1 != maxInChunk)
+		if (first != maxInChunk)
-			return res1; // failure or EOF
+			return first; // EOF or failure
-		int res2 = _ReadSync((char*)pBuffer + maxInChunk, offset + maxInChunk, bytesToRead - maxInChunk);
+		int rest = _ReadSync((char*)pBuffer + maxInChunk, offset + maxInChunk, bytesToRead - maxInChunk);
-		if (res2 < 0)
+		if (rest < 0)
-			return res2;
+			return rest;
-		return res1 + res2;
+		return first + rest;
 	}
-	// From here onwards it's guarenteed that the request is inside a single optimal extractable/cacheable span
+	// From here onwards it's guarenteed that the request is inside a single READ_CHUNK_SIZE boundaries
 	int res = m_cache.Read(pBuffer, offset, bytesToRead);
 	if (res >= 0)
 		return res;
-	// Not available from cache. Decompress a chunk with optimal boundaries
+	// Not available from cache. Decompress from optimal starting
-	// which contains the request.
+	// point in READ_CHUNK_SIZE chunks and cache each chunk.
 	char* chunk = (char*)malloc(chunkLen);
 	PTT s = NOW();
-	FILE* in = fopen(m_filename.ToUTF8(), "rb");
+	PX_off_t extractOffset = GetOptimalExtractionStart(offset); // guaranteed in READ_CHUNK_SIZE boundaries
-	res = extract(in, m_pIndex, chunkStart, (unsigned char*)chunk, chunkLen, &m_zstate);
+	int size = offset + maxInChunk - extractOffset;
-	fclose(in);
+	unsigned char* extracted = (unsigned char*)malloc(size);
 	int span = m_pIndex->span;
 	int spanix = extractOffset / span;
 	res = extract(m_src, m_pIndex, extractOffset, extracted, size, &(m_zstates[spanix].state));
 	if (res < 0) {
 		free(extracted);
 		return res;
 	}
 	int copied = ChunksCache::CopyAvailable(extracted, extractOffset, res, pBuffer, offset, bytesToRead);
 	if (m_zstates[spanix].state.isValid && (extractOffset + res) / span != offset / span) {
 		// The state no longer matches this span.
 		// move the state to the appropriate span because it will be faster than using the index
 		int targetix = (extractOffset + res) / span;
 		m_zstates[targetix].Kill();
 		m_zstates[targetix] = m_zstates[spanix]; // We have elements for the entire file, and another one.
 		m_zstates[spanix].state.isValid = 0; // Not killing because we need the state.
 	}
 	if (size <= READ_CHUNK_SIZE)
 		m_cache.Take(extracted, extractOffset, res, size);
 	else { // split into cacheable chunks
 		for (int i = 0; i < size; i += READ_CHUNK_SIZE) {
 			int available = CLAMP(res - i, 0, READ_CHUNK_SIZE);
 			void* chunk = available ? malloc(available) : 0;
 			if (available)
 				memcpy(chunk, extracted + i, available);
 			m_cache.Take(chunk, extractOffset + i, available, std::min(size - i, READ_CHUNK_SIZE));
 		}
 		free(extracted);
 	}
 	int duration = NOW() - s;
 	if (duration > 10)
-		Console.WriteLn(Color_Gray, "gunzip: %1.2f MB - %d ms", (float)(chunkLen) / 1024 / 1024, duration);
+		Console.WriteLn(Color_Gray, "gunzip: chunk #%5d-%2d : %1.2f MB - %d ms",
 			(int)(offset / 4 / 1024 / 1024),
 			(int)(offset % (4 * 1024 * 1024) / READ_CHUNK_SIZE),
 			(float)size / 1024 / 1024,
 			duration);
-	if (res < 0)
+	return copied;
 		return res;
 	int available = ChunksCache::CopyAvailable(chunk, chunkStart, res, pBuffer, offset, bytesToRead);
 	m_cache.Take(chunk, chunkStart, res, chunkLen);
 	return available;
 }
 void GzippedFileReader::Close() {
@ -380,9 +445,12 @@ void GzippedFileReader::Close() {
 		m_pIndex = 0;
 	}
-	if (m_zstate.isValid) {
+	InitZstates(); // results in delete because no index
-		(void)inflateEnd(&m_zstate.strm);
+	m_cache.Clear();
-		m_zstate.isValid = false;
+
 	if (m_src) {
 		fclose(m_src);
 		m_src = 0;
 	}
 }