From 49cfc3424b1366778d9a3c527ffd0dfe4b26aa92 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Wed, 15 Apr 2015 09:01:29 -0700 Subject: [PATCH 1/3] Keep around z_stream between decompression passes. Slightly more efficient to avoid reallocating the state for each block. --- pcsx2/CDVD/CsoFileReader.cpp | 61 +++++++++++++++++++----------------- pcsx2/CDVD/CsoFileReader.h | 8 +++-- 2 files changed, 39 insertions(+), 30 deletions(-) diff --git a/pcsx2/CDVD/CsoFileReader.cpp b/pcsx2/CDVD/CsoFileReader.cpp index 44f8ca5b1e..0f118202b0 100644 --- a/pcsx2/CDVD/CsoFileReader.cpp +++ b/pcsx2/CDVD/CsoFileReader.cpp @@ -140,6 +140,15 @@ bool CsoFileReader::InitializeBuffers() { return false; } + m_z_stream = new z_stream; + m_z_stream->zalloc = Z_NULL; + m_z_stream->zfree = Z_NULL; + m_z_stream->opaque = Z_NULL; + if (inflateInit2(m_z_stream, -15) != Z_OK) { + Console.Error("Unable to initialize zlib for CSO decompression."); + return false; + } + return true; } @@ -148,20 +157,24 @@ void CsoFileReader::Close() { if (m_src) { fclose(m_src); - m_src = 0; + m_src = NULL; + } + if (m_z_stream) { + inflateEnd(m_z_stream); + m_z_stream = NULL; } if (m_readBuffer) { delete[] m_readBuffer; - m_readBuffer = 0; + m_readBuffer = NULL; } if (m_zlibBuffer) { delete[] m_zlibBuffer; - m_zlibBuffer = 0; + m_zlibBuffer = NULL; } if (m_index) { delete[] m_index; - m_index = 0; + m_index = NULL; } } @@ -238,41 +251,33 @@ int CsoFileReader::ReadFromFrame(u8 *dest, u64 pos, u64 maxBytes) { } bool CsoFileReader::DecompressFrame(u32 frame, u32 readBufferSize) { - z_stream z; - z.zalloc = Z_NULL; - z.zfree = Z_NULL; - z.opaque = Z_NULL; - if (inflateInit2(&z, -15) != Z_OK) { - Console.Error("Unable to initialize zlib for CSO decompression."); - return false; - } + m_z_stream->next_in = m_readBuffer; + m_z_stream->avail_in = readBufferSize; + m_z_stream->next_out = m_zlibBuffer; + m_z_stream->avail_out = m_frameSize; - z.next_in = m_readBuffer; - z.avail_in = readBufferSize; - z.next_out = m_zlibBuffer; - z.avail_out = m_frameSize; - - int status = inflate(&z, Z_FINISH); - if (status != Z_STREAM_END || z.total_out != m_frameSize) { - inflateEnd(&z); + int status = inflate(m_z_stream, Z_FINISH); + bool success = status == Z_STREAM_END && m_z_stream->total_out == m_frameSize; + if (success) { + // Our buffer now contains this frame. + m_zlibBufferFrame = frame; + } else { Console.Error("Unable to decompress CSO frame using zlib."); - return false; + m_zlibBufferFrame = (u32)-1; } - inflateEnd(&z); - // Our buffer now contains this frame. - m_zlibBufferFrame = frame; - return true; + inflateReset(m_z_stream); + return success; } void CsoFileReader::BeginRead(void* pBuffer, uint sector, uint count) { // TODO: No async support yet, implement as sync. - mBytesRead = ReadSync(pBuffer, sector, count); + m_bytesRead = ReadSync(pBuffer, sector, count); } int CsoFileReader::FinishRead() { - int res = mBytesRead; - mBytesRead = -1; + int res = m_bytesRead; + m_bytesRead = -1; return res; } diff --git a/pcsx2/CDVD/CsoFileReader.h b/pcsx2/CDVD/CsoFileReader.h index 2363053741..1eac7ae270 100644 --- a/pcsx2/CDVD/CsoFileReader.h +++ b/pcsx2/CDVD/CsoFileReader.h @@ -18,6 +18,7 @@ #include "AsyncFileReader.h" struct CsoHeader; +typedef struct z_stream_s z_stream; class CsoFileReader : public AsyncFileReader { @@ -28,7 +29,9 @@ public: m_zlibBuffer(0), m_index(0), m_totalSize(0), - m_src(0) { + m_src(0), + m_z_stream(0), + m_bytesRead(0) { m_blocksize = 2048; }; @@ -69,6 +72,7 @@ private: u64 m_totalSize; // The actual source cso file handle. FILE* m_src; + z_stream* m_z_stream; // The result of a read is stored here between BeginRead() and FinishRead(). - int mBytesRead; + int m_bytesRead; }; From 427fa039ba060a0320c90f0fc0d3165baa4739c9 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Wed, 15 Apr 2015 09:04:10 -0700 Subject: [PATCH 2/3] Note why multiple blocks aren't optimized in CSO. --- pcsx2/CDVD/CsoFileReader.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pcsx2/CDVD/CsoFileReader.cpp b/pcsx2/CDVD/CsoFileReader.cpp index 0f118202b0..70b7a7eb51 100644 --- a/pcsx2/CDVD/CsoFileReader.cpp +++ b/pcsx2/CDVD/CsoFileReader.cpp @@ -183,6 +183,9 @@ int CsoFileReader::ReadSync(void* pBuffer, uint sector, uint count) { return 0; } + // Note that, in practice, count will always be 1. It seems one sector is read + // per interrupt, even if multiple are requested by the application. + u8* dest = (u8*)pBuffer; // We do it this way in case m_blocksize is not well aligned to our frame size. u64 pos = (u64)sector * (u64)m_blocksize; From 7b1214849acba37fd66693359d1e3fced001ed78 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Wed, 15 Apr 2015 10:27:59 -0700 Subject: [PATCH 3/3] Add disabled code to use a cache for cso reading. It doesn't seem like the cache is worth it, unfortunately. --- pcsx2/CDVD/CsoFileReader.cpp | 32 +++++++++++++++++++++++++++----- pcsx2/CDVD/CsoFileReader.h | 22 +++++++++++++++++++++- 2 files changed, 48 insertions(+), 6 deletions(-) diff --git a/pcsx2/CDVD/CsoFileReader.cpp b/pcsx2/CDVD/CsoFileReader.cpp index 70b7a7eb51..87ebe82eda 100644 --- a/pcsx2/CDVD/CsoFileReader.cpp +++ b/pcsx2/CDVD/CsoFileReader.cpp @@ -154,6 +154,9 @@ bool CsoFileReader::InitializeBuffers() { void CsoFileReader::Close() { m_filename.Empty(); +#if CSO_USE_CHUNKSCACHE + m_cache.Clear(); +#endif if (m_src) { fclose(m_src); @@ -193,18 +196,37 @@ int CsoFileReader::ReadSync(void* pBuffer, uint sector, uint count) { int bytes = 0; while (remaining > 0) { - int readBytes = ReadFromFrame(dest + bytes, pos + bytes, remaining); - if (readBytes == 0) { - // We hit EOF. - break; + int readBytes; + +#if CSO_USE_CHUNKSCACHE + // Try first to read from the cache. + readBytes = m_cache.Read(dest + bytes, pos + bytes, remaining); +#else + readBytes = -1; +#endif + if (readBytes < 0) { + readBytes = ReadFromFrame(dest + bytes, pos + bytes, remaining); + if (readBytes == 0) { + // We hit EOF. + break; + } + +#if CSO_USE_CHUNKSCACHE + // Add the bytes into the cache. We need to allocate a buffer for it. + void *cached = malloc(readBytes); + memcpy(cached, dest + bytes, readBytes); + m_cache.Take(cached, pos + bytes, readBytes, readBytes); +#endif } + bytes += readBytes; remaining -= readBytes; } + return bytes; } -int CsoFileReader::ReadFromFrame(u8 *dest, u64 pos, u64 maxBytes) { +int CsoFileReader::ReadFromFrame(u8 *dest, u64 pos, int maxBytes) { if (pos >= m_totalSize) { // Can't read anything passed the end. return 0; diff --git a/pcsx2/CDVD/CsoFileReader.h b/pcsx2/CDVD/CsoFileReader.h index 1eac7ae270..04f5c19ab1 100644 --- a/pcsx2/CDVD/CsoFileReader.h +++ b/pcsx2/CDVD/CsoFileReader.h @@ -15,11 +15,23 @@ #pragma once +// Based on testing, the overhead of using this cache is high. +// +// The test was done with CSO files using a block size of 16KB. +// Cache hit rates were observed in the range of 25%. +// Cache overhead added 35% to the overall read time. +// +// For this reason, it's currently disabled. +#define CSO_USE_CHUNKSCACHE 0 + #include "AsyncFileReader.h" +#include "ChunksCache.h" struct CsoHeader; typedef struct z_stream_s z_stream; +static const uint CSO_CHUNKCACHE_SIZE_MB = 200; + class CsoFileReader : public AsyncFileReader { DeclareNoncopyableObject(CsoFileReader); @@ -31,6 +43,9 @@ public: m_totalSize(0), m_src(0), m_z_stream(0), +#if CSO_USE_CHUNKSCACHE + m_cache(CSO_CHUNKCACHE_SIZE_MB), +#endif m_bytesRead(0) { m_blocksize = 2048; }; @@ -59,7 +74,7 @@ private: static bool ValidateHeader(const CsoHeader& hdr); bool ReadFileHeader(); bool InitializeBuffers(); - int ReadFromFrame(u8 *dest, u64 pos, u64 maxBytes); + int ReadFromFrame(u8 *dest, u64 pos, int maxBytes); bool DecompressFrame(u32 frame, u32 readBufferSize); u32 m_frameSize; @@ -73,6 +88,11 @@ private: // The actual source cso file handle. FILE* m_src; z_stream* m_z_stream; + +#if CSO_USE_CHUNKSCACHE + ChunksCache m_cache; +#endif + // The result of a read is stored here between BeginRead() and FinishRead(). int m_bytesRead; };