mirror of https://github.com/PCSX2/pcsx2.git
257 lines
8.3 KiB
C++
257 lines
8.3 KiB
C++
/* PCSX2 - PS2 Emulator for PCs
|
|
* Copyright (C) 2002-2021 PCSX2 Dev Team
|
|
*
|
|
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
|
|
* of the GNU Lesser General Public License as published by the Free Software Found-
|
|
* ation, either version 3 of the License, or (at your option) any later version.
|
|
*
|
|
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
|
|
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
|
* PURPOSE. See the GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License along with PCSX2.
|
|
* If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include "PrecompiledHeader.h"
|
|
#include "GSRingHeap.h"
|
|
#include "GS.h"
|
|
#include "GSExtra.h"
|
|
|
|
namespace
|
|
{
|
|
/// Align `value` to `align` bytes
|
|
template <size_t align>
|
|
size_t alignTo(size_t value)
|
|
{
|
|
return ((value + (align - 1)) / align) * align;
|
|
}
|
|
|
|
/// Align to a power of 2 using a mask that's equal to that value - 1
|
|
size_t alignUsingMask(size_t align_mask, size_t value)
|
|
{
|
|
return (value + align_mask) & ~align_mask;
|
|
}
|
|
} // namespace
|
|
|
|
/// GSRingHeap operates as a ring buffer, with usage counters for each quadrant
|
|
/// If a new quadrant needs to be used but is still in use by existing allocations,
|
|
/// the buffer is orphaned and a replaced with a new, larger buffer
|
|
struct GSRingHeap::Buffer
|
|
{
|
|
friend class GSRingHeap;
|
|
|
|
static const size_t BEGINNING_OFFSET;
|
|
|
|
static constexpr size_t USAGE_ARR_SIZE = sizeof(uint64_t) / sizeof(size_t);
|
|
static constexpr size_t USAGE_ARR_ELEMS_PER_ENTRY = sizeof(size_t) / sizeof(uint16_t);
|
|
|
|
/// Refcount, main heap holds onto buffer with a +1
|
|
/// Each allocation adds `sizeof(allocation)`, to allow for detection of buffers being used inefficiently
|
|
/// (e.g. if a buffer is orphaned with very little usage that means allocations aren't being freed in a similar order to being made)
|
|
/// Buffer is freed when the main heap drops it (-1) and every allocation is freed, causing this to reach 0
|
|
std::atomic<size_t> m_amt_allocated;
|
|
/// Holds 4x 16-bit usage counters, indicating how many allocations have been made from the nth quadrant of memory
|
|
/// Merged into `size_t` chunks so that they can be operated on with fewer atomic operations
|
|
std::atomic<size_t> m_usage[USAGE_ARR_SIZE];
|
|
/// Size of whole buffer (including header)
|
|
/// Should be kept to at least 2x the largest allocation
|
|
size_t m_size;
|
|
/// Offset of new allocations
|
|
size_t m_write_loc;
|
|
/// Amount to rshift buffer offset to get which quadrant it's in (`log2(m_size/4)`)
|
|
int m_quadrant_shift;
|
|
|
|
/// Increment usage counts (use when allocating)
|
|
void beginUse(uint64_t usage)
|
|
{
|
|
for (size_t i = 0; i < USAGE_ARR_SIZE; i++)
|
|
{
|
|
size_t piece = static_cast<size_t>(usage >> (i * (64 / USAGE_ARR_SIZE)));
|
|
size_t prev = m_usage[i].fetch_add(piece, std::memory_order_relaxed);
|
|
for (size_t j = 0; j < USAGE_ARR_ELEMS_PER_ENTRY; j++)
|
|
{
|
|
[[maybe_unused]] uint16_t section = prev >> (j * 16);
|
|
assert(section != UINT16_MAX && "Usage count overflow");
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Decrement usage counts (use when freeing)
|
|
void endUse(uint64_t usage)
|
|
{
|
|
for (size_t i = 0; i < USAGE_ARR_SIZE; i++)
|
|
{
|
|
size_t piece = static_cast<size_t>(usage >> (i * (64 / USAGE_ARR_SIZE)));
|
|
m_usage[i].fetch_sub(piece, std::memory_order_release);
|
|
}
|
|
}
|
|
|
|
/// Check if the given quadrant is still in use
|
|
bool isStillInUse(uint32_t quadrant)
|
|
{
|
|
int arridx = (quadrant / USAGE_ARR_ELEMS_PER_ENTRY) % USAGE_ARR_SIZE;
|
|
int shift = (quadrant % USAGE_ARR_ELEMS_PER_ENTRY) * 16;
|
|
return ((m_usage[arridx].load(std::memory_order_acquire) >> shift) & 0xFFFF) != 0;
|
|
}
|
|
|
|
uint32_t quadrant(size_t off)
|
|
{
|
|
return static_cast<uint32_t>(off >> m_quadrant_shift);
|
|
}
|
|
|
|
/// Calculate a usage mask from an offset + size
|
|
uint64_t usageMask(size_t begin_off, size_t size)
|
|
{
|
|
// We guarantee size <= two quadrants
|
|
// Therefore we only need to check beginning, middle, and end
|
|
uint64_t mask = 0;
|
|
mask |= 1ull << (quadrant(begin_off) * 16);
|
|
size_t mid_off = begin_off + size / 2;
|
|
mask |= 1ull << (quadrant(mid_off) * 16);
|
|
size_t end_off = begin_off + size - 1;
|
|
mask |= 1ull << (quadrant(end_off) * 16);
|
|
return mask;
|
|
}
|
|
|
|
/// Decrement the main amt_allocated refcount
|
|
void decref(size_t amt)
|
|
{
|
|
if (unlikely(m_amt_allocated.fetch_sub(amt, std::memory_order_release) == amt))
|
|
{
|
|
std::atomic_thread_fence(std::memory_order_acquire);
|
|
vmfree(this, m_size);
|
|
}
|
|
}
|
|
|
|
/// Free an allocation
|
|
void free(void* allocation, size_t size)
|
|
{
|
|
const char* base = reinterpret_cast<const char*>(this);
|
|
size_t begin_off = static_cast<const char*>(allocation) - base;
|
|
endUse(usageMask(begin_off, size));
|
|
decref(size);
|
|
}
|
|
|
|
/// Allocate a value of `size` bytes with `prefix_size` bytes before it (for allocation tracking) and alignment specified by `align_mask`
|
|
void* alloc(size_t size, size_t align_mask, size_t prefix_size)
|
|
{
|
|
uint32_t prev_quadrant = quadrant(m_write_loc - 1);
|
|
size_t base_off = alignUsingMask(align_mask, m_write_loc + prefix_size);
|
|
uint64_t usage_mask = 1ull << (quadrant(base_off - prefix_size) * 16);
|
|
uint32_t new_quadrant = quadrant(base_off + size - 1);
|
|
if (prev_quadrant != new_quadrant)
|
|
{
|
|
uint32_t cur_quadrant = prev_quadrant + 1;
|
|
if (new_quadrant >= 4)
|
|
{
|
|
cur_quadrant = 0;
|
|
usage_mask = 0;
|
|
base_off = alignUsingMask(align_mask, BEGINNING_OFFSET + prefix_size);
|
|
new_quadrant = quadrant(base_off + size - 1);
|
|
}
|
|
do
|
|
{
|
|
usage_mask |= 1ull << (cur_quadrant * 16);
|
|
if (unlikely(isStillInUse(cur_quadrant)))
|
|
return nullptr;
|
|
} while (++cur_quadrant <= new_quadrant);
|
|
}
|
|
|
|
m_write_loc = base_off + size;
|
|
beginUse(usage_mask);
|
|
m_amt_allocated.fetch_add(size + prefix_size, std::memory_order_relaxed);
|
|
return reinterpret_cast<char*>(this) + base_off - prefix_size;
|
|
}
|
|
|
|
static Buffer* make(int quadrant_shift)
|
|
{
|
|
size_t size = 4ull << quadrant_shift;
|
|
Buffer* buffer = reinterpret_cast<Buffer*>(vmalloc(size, false));
|
|
buffer->m_size = size;
|
|
buffer->m_quadrant_shift = quadrant_shift;
|
|
buffer->m_amt_allocated.store(1, std::memory_order_relaxed);
|
|
for (std::atomic<size_t>& usage : buffer->m_usage)
|
|
usage.store(0, std::memory_order_relaxed);
|
|
buffer->m_write_loc = BEGINNING_OFFSET;
|
|
return buffer;
|
|
}
|
|
};
|
|
|
|
const size_t GSRingHeap::Buffer::BEGINNING_OFFSET = alignTo<64>(sizeof(Buffer));
|
|
constexpr size_t GSRingHeap::MIN_ALIGN;
|
|
|
|
GSRingHeap::GSRingHeap()
|
|
{
|
|
m_current_buffer = Buffer::make(14); // Start with 64k buffer
|
|
}
|
|
|
|
GSRingHeap::~GSRingHeap() noexcept
|
|
{
|
|
orphanBuffer();
|
|
}
|
|
|
|
void GSRingHeap::orphanBuffer() noexcept
|
|
{
|
|
m_current_buffer->decref(1);
|
|
}
|
|
|
|
void* GSRingHeap::alloc_internal(size_t size, size_t align_mask, size_t prefix_size)
|
|
{
|
|
prefix_size += sizeof(Buffer*); // Add space for a pointer to the buffer
|
|
size_t total_size = size + prefix_size;
|
|
|
|
if (likely(total_size <= (m_current_buffer->m_size / 2)))
|
|
{
|
|
if (void* ptr = m_current_buffer->alloc(size, align_mask, prefix_size))
|
|
{
|
|
Buffer** bptr = static_cast<Buffer**>(ptr);
|
|
*bptr = m_current_buffer;
|
|
return bptr + 1;
|
|
}
|
|
else if (IsDevBuild)
|
|
{
|
|
size_t total = m_current_buffer->m_size;
|
|
size_t mb = 1024 * 1024;
|
|
if (total >= mb)
|
|
{
|
|
size_t used = m_current_buffer->m_amt_allocated.load(std::memory_order_relaxed) - 1;
|
|
if (used * 4 < total)
|
|
{
|
|
fprintf(stderr, "GSRingHeap: Orphaning %zdmb buffer with low usage of %d%%, check that allocations are actually being deallocated approximately in order\n", total / mb, static_cast<int>((used * 100) / total));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Couldn't allocate, orphan buffer and make a new one
|
|
int shift = m_current_buffer->m_quadrant_shift;
|
|
do
|
|
{
|
|
shift++;
|
|
} while (total_size > (2ull << shift));
|
|
|
|
if (shift > 24 && total_size <= (2ull << (shift - 1)))
|
|
{
|
|
// If this needs to be >64 mb, we're doing something wrong
|
|
fprintf(stderr, "GSRingHeap: Refusing to grow to %umb\n", 4u << (shift - 20));
|
|
shift--;
|
|
}
|
|
Buffer* new_buffer = Buffer::make(shift);
|
|
orphanBuffer();
|
|
m_current_buffer = new_buffer;
|
|
void* ptr = m_current_buffer->alloc(size, align_mask, prefix_size);
|
|
assert(ptr && "Fresh buffer failed to allocate!");
|
|
|
|
Buffer** bptr = static_cast<Buffer**>(ptr);
|
|
*bptr = m_current_buffer;
|
|
return bptr + 1;
|
|
}
|
|
|
|
void GSRingHeap::free_internal(void* ptr, size_t size) noexcept
|
|
{
|
|
size += sizeof(Buffer*);
|
|
Buffer** bptr = static_cast<Buffer**>(ptr) - 1;
|
|
(*bptr)->free(bptr, size);
|
|
}
|