rsx: Minor refactoring RSXThread

- Part 1 of many
This commit is contained in:
kd-11 2023-01-07 19:20:21 +03:00 committed by kd-11
parent 659ee81e80
commit 3dba894369
22 changed files with 637 additions and 525 deletions

View File

@ -9,7 +9,7 @@
#include "Emu/perf_meter.hpp"
#include "Emu/Memory/vm_reservation.h"
#include "Emu/Memory/vm_locking.h"
#include "Emu/RSX/RSXThread.h"
#include "Emu/RSX/Core/RSXReservationLock.hpp"
#include "Emu/VFS.h"
#include "Emu/system_progress.hpp"
#include "Emu/system_utils.hpp"

View File

@ -9,7 +9,6 @@
#include "Emu/VFS.h"
#include "Emu/IdManager.h"
#include "Emu/perf_meter.hpp"
#include "Emu/RSX/RSXThread.h"
#include "Emu/Cell/PPUThread.h"
#include "Emu/Cell/ErrorCodes.h"
#include "Emu/Cell/lv2/sys_spu.h"
@ -23,6 +22,9 @@
#include "Emu/Cell/SPURecompiler.h"
#include "Emu/Cell/timers.hpp"
#include "Emu/RSX/Core/RSXReservationLock.hpp"
#include "Emu/RSX/RSXThread.h"
#include <cmath>
#include <cfenv>
#include <thread>

View File

@ -5,6 +5,8 @@
#include "Emu/Cell/ErrorCodes.h"
#include "Emu/Cell/timers.hpp"
#include "Emu/Memory/vm_locking.h"
#include "Emu/RSX/Core/RSXEngLock.hpp"
#include "Emu/RSX/Core/RSXReservationLock.hpp"
#include "Emu/RSX/RSXThread.h"
#include "util/asm.hpp"
#include "sys_event.h"

View File

@ -0,0 +1,73 @@
#pragma once
#include <util/types.hpp>
#include <util/logs.hpp>
#include <deque>
namespace rsx
{
struct frame_statistics_t
{
u32 draw_calls;
u32 submit_count;
s64 setup_time;
s64 vertex_upload_time;
s64 textures_upload_time;
s64 draw_exec_time;
s64 flip_time;
};
struct display_flip_info_t
{
std::deque<u32> buffer_queue;
u32 buffer;
bool skip_frame;
bool emu_flip;
bool in_progress;
frame_statistics_t stats;
inline void push(u32 _buffer)
{
buffer_queue.push_back(_buffer);
}
inline bool pop(u32 _buffer)
{
if (buffer_queue.empty())
{
return false;
}
do
{
const auto index = buffer_queue.front();
buffer_queue.pop_front();
if (index == _buffer)
{
buffer = _buffer;
return true;
}
} while (!buffer_queue.empty());
// Need to observe this happening in the wild
rsx_log.error("Display queue was discarded while not empty!");
return false;
}
};
class vblank_thread
{
std::shared_ptr<named_thread<std::function<void()>>> m_thread;
public:
vblank_thread() = default;
vblank_thread(const vblank_thread&) = delete;
void set_thread(std::shared_ptr<named_thread<std::function<void()>>> thread);
vblank_thread& operator=(thread_state);
vblank_thread& operator=(const vblank_thread&) = delete;
};
}

View File

@ -0,0 +1,31 @@
#pragma once
#include <util/types.hpp>
#include "../RSXThread.h"
namespace rsx
{
class eng_lock
{
rsx::thread* pthr;
public:
eng_lock(rsx::thread* target)
:pthr(target)
{
if (pthr->is_current_thread())
{
pthr = nullptr;
}
else
{
pthr->pause();
}
}
~eng_lock()
{
if (pthr) pthr->unpause();
}
};
}

View File

@ -0,0 +1,42 @@
#pragma once
#include <util/types.hpp>
#include "../gcm_enums.h"
#include "../GCM.h"
namespace rsx
{
struct tiled_region
{
u32 address;
u32 base;
GcmTileInfo* tile;
u8* ptr;
void write(const void* src, u32 width, u32 height, u32 pitch);
void read(void* dst, u32 width, u32 height, u32 pitch);
};
struct framebuffer_layout
{
ENABLE_BITWISE_SERIALIZATION;
u16 width;
u16 height;
std::array<u32, 4> color_addresses;
std::array<u32, 4> color_pitch;
std::array<u32, 4> actual_color_pitch;
std::array<bool, 4> color_write_enabled;
u32 zeta_address;
u32 zeta_pitch;
u32 actual_zeta_pitch;
bool zeta_write_enabled;
rsx::surface_target target;
rsx::surface_color_format color_format;
rsx::surface_depth_format2 depth_format;
rsx::surface_antialiasing aa_mode;
rsx::surface_raster_type raster_type;
u32 aa_factors[2];
bool ignore_change;
};
}

View File

@ -0,0 +1,86 @@
#pragma once
#include <util/types.hpp>
#include "Utilities/mutex.h"
#include "Emu/CPU/CPUThread.h"
namespace rsx
{
struct rsx_iomap_table
{
static constexpr u32 c_lock_stride = 8192;
std::array<atomic_t<u32>, 4096> ea;
std::array<atomic_t<u32>, 4096> io;
std::array<shared_mutex, 0x1'0000'0000 / c_lock_stride> rs;
rsx_iomap_table() noexcept;
// Try to get the real address given a mapped address
// Returns -1 on failure
u32 get_addr(u32 offs) const noexcept
{
return this->ea[offs >> 20] | (offs & 0xFFFFF);
}
template <bool IsFullLock, uint Stride>
bool lock(u32 addr, u32 len, cpu_thread* self = nullptr) noexcept
{
if (len <= 1) return false;
const u32 end = addr + len - 1;
bool added_wait = false;
for (u32 block = addr / c_lock_stride; block <= (end / c_lock_stride); block += Stride)
{
auto& mutex_ = rs[block];
if (IsFullLock ? !mutex_.try_lock() : !mutex_.try_lock_shared()) [[ unlikely ]]
{
if (self)
{
added_wait |= !self->state.test_and_set(cpu_flag::wait);
}
if (!self || self->id_type() != 0x55u)
{
IsFullLock ? mutex_.lock() : mutex_.lock_shared();
}
else
{
while (IsFullLock ? !mutex_.try_lock() : !mutex_.try_lock_shared())
{
self->cpu_wait({});
}
}
}
}
if (added_wait)
{
self->check_state();
}
return true;
}
template <bool IsFullLock, uint Stride>
void unlock(u32 addr, u32 len) noexcept
{
ensure(len >= 1);
const u32 end = addr + len - 1;
for (u32 block = (addr / 8192); block <= (end / 8192); block += Stride)
{
if constexpr (IsFullLock)
{
rs[block].unlock();
}
else
{
rs[block].unlock_shared();
}
}
}
};
}

View File

@ -0,0 +1,106 @@
#pragma once
#include <util/types.hpp>
#include "../RSXThread.h"
namespace rsx
{
template<bool IsFullLock = false, uint Stride = 128>
class reservation_lock
{
u32 addr = 0;
u32 length = 0;
inline void lock_range(u32 addr, u32 length)
{
if (!get_current_renderer()->iomap_table.lock<IsFullLock, Stride>(addr, length, get_current_cpu_thread()))
{
length = 0;
}
this->addr = addr;
this->length = length;
}
public:
reservation_lock(u32 addr, u32 length)
{
if (g_cfg.core.rsx_accurate_res_access &&
addr < constants::local_mem_base)
{
lock_range(addr, length);
}
}
reservation_lock(u32 addr, u32 length, bool setting)
{
if (setting)
{
lock_range(addr, length);
}
}
// Multi-range lock. If ranges overlap, the combined range will be acquired.
// If ranges do not overlap, the first range that is in main memory will be acquired.
reservation_lock(u32 dst_addr, u32 dst_length, u32 src_addr, u32 src_length)
{
if (g_cfg.core.rsx_accurate_res_access)
{
const auto range1 = utils::address_range::start_length(dst_addr, dst_length);
const auto range2 = utils::address_range::start_length(src_addr, src_length);
utils::address_range target_range;
if (!range1.overlaps(range2)) [[likely]]
{
target_range = (dst_addr < constants::local_mem_base) ? range1 : range2;
}
else
{
// Very unlikely
target_range = range1.get_min_max(range2);
}
if (target_range.start < constants::local_mem_base)
{
lock_range(target_range.start, target_range.length());
}
}
}
// Very special utility for batched transfers (SPU related)
template <typename T = void>
void update_if_enabled(u32 addr, u32 _length, const std::add_pointer_t<T>& lock_release = std::add_pointer_t<void>{})
{
// This check is not perfect but it covers the important cases fast (this check is only an optimization - forcing true disables it)
if (length && (this->addr / rsx_iomap_table::c_lock_stride != addr / rsx_iomap_table::c_lock_stride || (addr % rsx_iomap_table::c_lock_stride + _length) > rsx_iomap_table::c_lock_stride) && _length > 1)
{
if constexpr (!std::is_void_v<T>)
{
// See SPUThread.cpp
lock_release->release(0);
}
unlock();
lock_range(addr, _length);
}
}
void unlock(bool destructor = false)
{
if (length)
{
get_current_renderer()->iomap_table.unlock<IsFullLock, Stride>(addr, length);
if (!destructor)
{
length = 0;
}
}
}
~reservation_lock()
{
unlock(true);
}
};
}

View File

@ -0,0 +1,168 @@
#pragma once
#include <util/types.hpp>
#include "../Common/simple_array.hpp"
#include "../gcm_enums.h"
#include <span>
namespace rsx
{
struct vertex_array_buffer
{
rsx::vertex_base_type type;
u8 attribute_size;
u8 stride;
std::span<const std::byte> data;
u8 index;
bool is_be;
};
struct vertex_array_register
{
rsx::vertex_base_type type;
u8 attribute_size;
std::array<u32, 4> data;
u8 index;
};
struct empty_vertex_array
{
u8 index;
};
struct draw_array_command
{
u32 __dummy;
};
struct draw_indexed_array_command
{
std::span<const std::byte> raw_index_buffer;
};
struct draw_inlined_array
{
u32 __dummy;
u32 __dummy2;
};
struct interleaved_attribute_t
{
u8 index;
bool modulo;
u16 frequency;
};
struct interleaved_range_info
{
bool interleaved = false;
bool single_vertex = false;
u32 base_offset = 0;
u32 real_offset_address = 0;
u8 memory_location = 0;
u8 attribute_stride = 0;
rsx::simple_array<interleaved_attribute_t> locations;
// Check if we need to upload a full unoptimized range, i.e [0-max_index]
std::pair<u32, u32> calculate_required_range(u32 first, u32 count) const;
};
enum attribute_buffer_placement : u8
{
none = 0,
persistent = 1,
transient = 2
};
class vertex_input_layout
{
int m_num_used_blocks = 0;
std::array<interleaved_range_info, 16> m_blocks_data{};
public:
rsx::simple_array<interleaved_range_info*> interleaved_blocks{}; // Interleaved blocks to be uploaded as-is
std::vector<std::pair<u8, u32>> volatile_blocks{}; // Volatile data blocks (immediate draw vertex data for example)
rsx::simple_array<u8> referenced_registers{}; // Volatile register data
std::array<attribute_buffer_placement, 16> attribute_placement = fill_array(attribute_buffer_placement::none);
vertex_input_layout() = default;
interleaved_range_info* alloc_interleaved_block()
{
auto result = &m_blocks_data[m_num_used_blocks++];
result->attribute_stride = 0;
result->base_offset = 0;
result->memory_location = 0;
result->real_offset_address = 0;
result->single_vertex = false;
result->locations.clear();
result->interleaved = true;
return result;
}
void clear()
{
m_num_used_blocks = 0;
interleaved_blocks.clear();
volatile_blocks.clear();
referenced_registers.clear();
}
bool validate() const
{
// Criteria: At least one array stream has to be defined to feed vertex positions
// This stream cannot be a const register as the vertices cannot create a zero-area primitive
if (!interleaved_blocks.empty() && interleaved_blocks[0]->attribute_stride != 0)
return true;
if (!volatile_blocks.empty())
return true;
for (u8 index = 0; index < limits::vertex_count; ++index)
{
switch (attribute_placement[index])
{
case attribute_buffer_placement::transient:
{
// Ignore register reference
if (std::find(referenced_registers.begin(), referenced_registers.end(), index) != referenced_registers.end())
continue;
// The source is inline array or immediate draw push buffer
return true;
}
case attribute_buffer_placement::persistent:
{
return true;
}
case attribute_buffer_placement::none:
{
continue;
}
default:
{
fmt::throw_exception("Unreachable");
}
}
}
return false;
}
u32 calculate_interleaved_memory_requirements(u32 first_vertex, u32 vertex_count) const
{
u32 mem = 0;
for (auto& block : interleaved_blocks)
{
const auto range = block->calculate_required_range(first_vertex, vertex_count);
mem += range.second * block->attribute_stride;
}
return mem;
}
};
}

View File

@ -1037,11 +1037,11 @@ void GLGSRender::on_semaphore_acquire_wait()
if (!work_queue.empty() ||
(async_flip_requested & flip_request::emu_requested))
{
do_local_task(rsx::FIFO_state::lock_wait);
do_local_task(rsx::FIFO::state::lock_wait);
}
}
void GLGSRender::do_local_task(rsx::FIFO_state state)
void GLGSRender::do_local_task(rsx::FIFO::state state)
{
if (!work_queue.empty())
{
@ -1058,7 +1058,7 @@ void GLGSRender::do_local_task(rsx::FIFO_state state)
q.processed = true;
}
}
else if (!in_begin_end && state != rsx::FIFO_state::lock_wait)
else if (!in_begin_end && state != rsx::FIFO::state::lock_wait)
{
if (m_graphics_state & rsx::pipeline_state::framebuffer_reads_dirty)
{
@ -1071,7 +1071,7 @@ void GLGSRender::do_local_task(rsx::FIFO_state state)
rsx::thread::do_local_task(state);
if (state == rsx::FIFO_state::lock_wait)
if (state == rsx::FIFO::state::lock_wait)
{
// Critical check finished
return;

View File

@ -193,7 +193,7 @@ protected:
void on_exit() override;
void flip(const rsx::display_flip_info_t& info) override;
void do_local_task(rsx::FIFO_state state) override;
void do_local_task(rsx::FIFO::state state) override;
bool on_access_violation(u32 address, bool is_writing) override;
void on_invalidate_memory_range(const utils::address_range &range, rsx::invalidation_cause cause) override;

View File

@ -4,6 +4,7 @@
#include "RSXThread.h"
#include "Capture/rsx_capture.h"
#include "Common/time.hpp"
#include "Core/RSXReservationLock.hpp"
#include "Emu/Memory/vm_reservation.h"
#include "Emu/Cell/lv2/sys_rsx.h"
#include "util/asm.hpp"
@ -613,20 +614,20 @@ namespace rsx
{
case FIFO::FIFO_NOP:
{
if (performance_counters.state == FIFO_state::running)
if (performance_counters.state == FIFO::state::running)
{
performance_counters.FIFO_idle_timestamp = rsx::uclock();
performance_counters.state = FIFO_state::nop;
performance_counters.state = FIFO::state::nop;
}
return;
}
case FIFO::FIFO_EMPTY:
{
if (performance_counters.state == FIFO_state::running)
if (performance_counters.state == FIFO::state::running)
{
performance_counters.FIFO_idle_timestamp = rsx::uclock();
performance_counters.state = FIFO_state::empty;
performance_counters.state = FIFO::state::empty;
}
else
{
@ -658,13 +659,13 @@ namespace rsx
if (offs == fifo_ctrl->get_pos())
{
//Jump to self. Often preceded by NOP
if (performance_counters.state == FIFO_state::running)
if (performance_counters.state == FIFO::state::running)
{
performance_counters.FIFO_idle_timestamp = rsx::uclock();
sync_point_request.release(true);
}
performance_counters.state = FIFO_state::spinning;
performance_counters.state = FIFO::state::spinning;
}
else
{
@ -710,14 +711,14 @@ namespace rsx
}
if (const auto state = performance_counters.state;
state != FIFO_state::running)
state != FIFO::state::running)
{
performance_counters.state = FIFO_state::running;
performance_counters.state = FIFO::state::running;
// Hack: Delay FIFO wake-up according to setting
// NOTE: The typical spin setup is a NOP followed by a jump-to-self
// NOTE: There is a small delay when the jump address is dynamically edited by cell
if (state != FIFO_state::nop)
if (state != FIFO::state::nop)
{
fifo_wake_delay();
}

View File

@ -32,6 +32,22 @@ namespace rsx
EMIT_BARRIER = 2
};
enum class state : u8
{
running = 0,
empty = 1, // PUT == GET
spinning = 2, // Puller continuously jumps to self addr (synchronization technique)
nop = 3, // Puller is processing a NOP command
lock_wait = 4,// Puller is processing a lock acquire
paused = 5, // Puller is paused externallly
};
enum class interrupt_hint : u8
{
conditional_render_eval = 1,
zcull_sync = 2
};
struct register_pair
{
u32 reg;

View File

@ -2,6 +2,7 @@
#include "Emu/Memory/vm.h"
#include "Common/BufferUtils.h"
#include "Core/RSXReservationLock.hpp"
#include "RSXOffload.h"
#include "RSXThread.h"

View File

@ -5,12 +5,14 @@
#include "Emu/Cell/SPUThread.h"
#include "Emu/Cell/timers.hpp"
#include "Capture/rsx_capture.h"
#include "Common/BufferUtils.h"
#include "Common/buffer_stream.hpp"
#include "Common/texture_cache.h"
#include "Common/surface_store.h"
#include "Common/time.hpp"
#include "Capture/rsx_capture.h"
#include "Core/RSXReservationLock.hpp"
#include "Core/RSXEngLock.hpp"
#include "rsx_methods.h"
#include "gcm_printing.h"
#include "RSXDisAsm.h"
@ -733,7 +735,7 @@ namespace rsx
if ((state & (cpu_flag::dbg_global_pause + cpu_flag::exit)) == cpu_flag::dbg_global_pause)
{
// Wait 16ms during emulation pause. This reduces cpu load while still giving us the chance to render overlays.
do_local_task(rsx::FIFO_state::paused);
do_local_task(rsx::FIFO::state::paused);
thread_ctrl::wait_on(state, old, 16000);
}
else
@ -803,7 +805,7 @@ namespace rsx
check_zcull_status(false);
nv4097::set_render_mode(this, 0, method_registers.registers[NV4097_SET_RENDER_ENABLE]);
performance_counters.state = FIFO_state::empty;
performance_counters.state = FIFO::state::empty;
const u64 event_flags = unsent_gcm_events.exchange(0);
@ -832,7 +834,7 @@ namespace rsx
thread_ctrl::wait_for(1000);
}
performance_counters.state = FIFO_state::running;
performance_counters.state = FIFO::state::running;
fifo_ctrl = std::make_unique<::rsx::FIFO::FIFO_control>(this);
fifo_ctrl->set_get(ctrl->get);
@ -994,7 +996,7 @@ namespace rsx
// Clear any pending flush requests to release threads
std::this_thread::sleep_for(10ms);
do_local_task(rsx::FIFO_state::lock_wait);
do_local_task(rsx::FIFO::state::lock_wait);
g_fxo->get<rsx::dma_manager>().join();
g_fxo->get<vblank_thread>() = thread_state::finished;
@ -1261,7 +1263,7 @@ namespace rsx
fmt::throw_exception("ill-formed draw command");
}
void thread::do_local_task(FIFO_state state)
void thread::do_local_task(FIFO::state state)
{
m_eng_interrupt_mask.clear(rsx::backend_interrupt);
@ -1272,7 +1274,7 @@ namespace rsx
handle_emu_flip(async_flip_buffer);
}
if (!in_begin_end && state != FIFO_state::lock_wait)
if (!in_begin_end && state != FIFO::state::lock_wait)
{
if (atomic_storage<u32>::load(m_invalidated_memory_range.end) != 0)
{
@ -2845,7 +2847,7 @@ namespace rsx
if (!result.queries.empty())
{
cond_render_ctrl.set_eval_sources(result.queries);
sync_hint(FIFO_hint::hint_conditional_render_eval, { .query = cond_render_ctrl.eval_sources.front(), .address = ref });
sync_hint(FIFO::interrupt_hint::conditional_render_eval, { .query = cond_render_ctrl.eval_sources.front(), .address = ref });
}
else
{
@ -2895,7 +2897,7 @@ namespace rsx
//ensure(async_tasks_pending.load() == 0);
}
void thread::sync_hint(FIFO_hint /*hint*/, rsx::reports::sync_hint_payload_t payload)
void thread::sync_hint(FIFO::interrupt_hint /*hint*/, rsx::reports::sync_hint_payload_t payload)
{
zcull_ctrl->on_sync_hint(payload);
}

View File

@ -28,6 +28,11 @@
#include "Emu/IdManager.h"
#include "Emu/system_config.h"
#include "Core/RSXDisplay.h"
#include "Core/RSXFrameBuffer.h"
#include "Core/RSXIOMap.hpp"
#include "Core/RSXVertexTypes.h"
extern atomic_t<bool> g_user_asked_for_frame_capture;
extern atomic_t<bool> g_disable_frame_limit;
extern rsx::frame_trace_data frame_debug;
@ -40,84 +45,6 @@ namespace rsx
class display_manager;
}
struct rsx_iomap_table
{
static constexpr u32 c_lock_stride = 8192;
std::array<atomic_t<u32>, 4096> ea;
std::array<atomic_t<u32>, 4096> io;
std::array<shared_mutex, 0x1'0000'0000 / c_lock_stride> rs;
rsx_iomap_table() noexcept;
// Try to get the real address given a mapped address
// Returns -1 on failure
u32 get_addr(u32 offs) const noexcept
{
return this->ea[offs >> 20] | (offs & 0xFFFFF);
}
template <bool IsFullLock, uint Stride>
bool lock(u32 addr, u32 len, cpu_thread* self = nullptr) noexcept
{
if (len <= 1) return false;
const u32 end = addr + len - 1;
bool added_wait = false;
for (u32 block = addr / c_lock_stride; block <= (end / c_lock_stride); block += Stride)
{
auto& mutex_ = rs[block];
if (IsFullLock ? !mutex_.try_lock() : !mutex_.try_lock_shared()) [[ unlikely ]]
{
if (self)
{
added_wait |= !self->state.test_and_set(cpu_flag::wait);
}
if (!self || self->id_type() != 0x55u)
{
IsFullLock ? mutex_.lock() : mutex_.lock_shared();
}
else
{
while (IsFullLock ? !mutex_.try_lock() : !mutex_.try_lock_shared())
{
self->cpu_wait({});
}
}
}
}
if (added_wait)
{
self->check_state();
}
return true;
}
template <bool IsFullLock, uint Stride>
void unlock(u32 addr, u32 len) noexcept
{
ensure(len >= 1);
const u32 end = addr + len - 1;
for (u32 block = (addr / 8192); block <= (end / 8192); block += Stride)
{
if constexpr (IsFullLock)
{
rs[block].unlock();
}
else
{
rs[block].unlock_shared();
}
}
}
};
enum framebuffer_creation_context : u8
{
context_draw = 0,
@ -175,22 +102,6 @@ namespace rsx
all_interrupt_bits = memory_config_interrupt | backend_interrupt | display_interrupt | pipe_flush_interrupt
};
enum FIFO_state : u8
{
running = 0,
empty = 1, // PUT == GET
spinning = 2, // Puller continuously jumps to self addr (synchronization technique)
nop = 3, // Puller is processing a NOP command
lock_wait = 4,// Puller is processing a lock acquire
paused = 5, // Puller is paused externallly
};
enum FIFO_hint : u8
{
hint_conditional_render_eval = 1,
hint_zcull_sync = 2
};
enum result_flags: u8
{
result_none = 0,
@ -206,264 +117,6 @@ namespace rsx
const char* file = __builtin_FILE(),
const char* func = __builtin_FUNCTION());
struct tiled_region
{
u32 address;
u32 base;
GcmTileInfo *tile;
u8 *ptr;
void write(const void *src, u32 width, u32 height, u32 pitch);
void read(void *dst, u32 width, u32 height, u32 pitch);
};
struct vertex_array_buffer
{
rsx::vertex_base_type type;
u8 attribute_size;
u8 stride;
std::span<const std::byte> data;
u8 index;
bool is_be;
};
struct vertex_array_register
{
rsx::vertex_base_type type;
u8 attribute_size;
std::array<u32, 4> data;
u8 index;
};
struct empty_vertex_array
{
u8 index;
};
struct draw_array_command
{
u32 __dummy;
};
struct draw_indexed_array_command
{
std::span<const std::byte> raw_index_buffer;
};
struct draw_inlined_array
{
u32 __dummy;
u32 __dummy2;
};
struct interleaved_attribute_t
{
u8 index;
bool modulo;
u16 frequency;
};
struct interleaved_range_info
{
bool interleaved = false;
bool single_vertex = false;
u32 base_offset = 0;
u32 real_offset_address = 0;
u8 memory_location = 0;
u8 attribute_stride = 0;
rsx::simple_array<interleaved_attribute_t> locations;
// Check if we need to upload a full unoptimized range, i.e [0-max_index]
std::pair<u32, u32> calculate_required_range(u32 first, u32 count) const;
};
enum attribute_buffer_placement : u8
{
none = 0,
persistent = 1,
transient = 2
};
class vertex_input_layout
{
int m_num_used_blocks = 0;
std::array<interleaved_range_info, 16> m_blocks_data{};
public:
rsx::simple_array<interleaved_range_info*> interleaved_blocks{}; // Interleaved blocks to be uploaded as-is
std::vector<std::pair<u8, u32>> volatile_blocks{}; // Volatile data blocks (immediate draw vertex data for example)
rsx::simple_array<u8> referenced_registers{}; // Volatile register data
std::array<attribute_buffer_placement, 16> attribute_placement = fill_array(attribute_buffer_placement::none);
vertex_input_layout() = default;
interleaved_range_info* alloc_interleaved_block()
{
auto result = &m_blocks_data[m_num_used_blocks++];
result->attribute_stride = 0;
result->base_offset = 0;
result->memory_location = 0;
result->real_offset_address = 0;
result->single_vertex = false;
result->locations.clear();
result->interleaved = true;
return result;
}
void clear()
{
m_num_used_blocks = 0;
interleaved_blocks.clear();
volatile_blocks.clear();
referenced_registers.clear();
}
bool validate() const
{
// Criteria: At least one array stream has to be defined to feed vertex positions
// This stream cannot be a const register as the vertices cannot create a zero-area primitive
if (!interleaved_blocks.empty() && interleaved_blocks[0]->attribute_stride != 0)
return true;
if (!volatile_blocks.empty())
return true;
for (u8 index = 0; index < limits::vertex_count; ++index)
{
switch (attribute_placement[index])
{
case attribute_buffer_placement::transient:
{
// Ignore register reference
if (std::find(referenced_registers.begin(), referenced_registers.end(), index) != referenced_registers.end())
continue;
// The source is inline array or immediate draw push buffer
return true;
}
case attribute_buffer_placement::persistent:
{
return true;
}
case attribute_buffer_placement::none:
{
continue;
}
default:
{
fmt::throw_exception("Unreachable");
}
}
}
return false;
}
u32 calculate_interleaved_memory_requirements(u32 first_vertex, u32 vertex_count) const
{
u32 mem = 0;
for (auto &block : interleaved_blocks)
{
const auto range = block->calculate_required_range(first_vertex, vertex_count);
mem += range.second * block->attribute_stride;
}
return mem;
}
};
struct framebuffer_layout
{
ENABLE_BITWISE_SERIALIZATION;
u16 width;
u16 height;
std::array<u32, 4> color_addresses;
std::array<u32, 4> color_pitch;
std::array<u32, 4> actual_color_pitch;
std::array<bool, 4> color_write_enabled;
u32 zeta_address;
u32 zeta_pitch;
u32 actual_zeta_pitch;
bool zeta_write_enabled;
rsx::surface_target target;
rsx::surface_color_format color_format;
rsx::surface_depth_format2 depth_format;
rsx::surface_antialiasing aa_mode;
rsx::surface_raster_type raster_type;
u32 aa_factors[2];
bool ignore_change;
};
struct frame_statistics_t
{
u32 draw_calls;
u32 submit_count;
s64 setup_time;
s64 vertex_upload_time;
s64 textures_upload_time;
s64 draw_exec_time;
s64 flip_time;
};
struct display_flip_info_t
{
std::deque<u32> buffer_queue;
u32 buffer;
bool skip_frame;
bool emu_flip;
bool in_progress;
frame_statistics_t stats;
inline void push(u32 _buffer)
{
buffer_queue.push_back(_buffer);
}
inline bool pop(u32 _buffer)
{
if (buffer_queue.empty())
{
return false;
}
do
{
const auto index = buffer_queue.front();
buffer_queue.pop_front();
if (index == _buffer)
{
buffer = _buffer;
return true;
}
}
while (!buffer_queue.empty());
// Need to observe this happening in the wild
rsx_log.error("Display queue was discarded while not empty!");
return false;
}
};
class vblank_thread
{
std::shared_ptr<named_thread<std::function<void()>>> m_thread;
public:
vblank_thread() = default;
vblank_thread(const vblank_thread&) = delete;
void set_thread(std::shared_ptr<named_thread<std::function<void()>>> thread);
vblank_thread& operator=(thread_state);
vblank_thread& operator=(const vblank_thread&) = delete;
};
struct backend_configuration
{
bool supports_multidraw; // Draw call batching
@ -493,6 +146,7 @@ namespace rsx
u64 tsc;
};
// TODO: This class is a mess, this needs to be broken into smaller chunks, like I did for RSXFIFO and RSXZCULL (kd)
class thread : public cpu_thread
{
u64 timestamp_ctrl = 0;
@ -586,7 +240,7 @@ namespace rsx
atomic_t<u64> idle_time{ 0 }; // Time spent idling in microseconds
u64 last_update_timestamp = 0; // Timestamp of last load update
u64 FIFO_idle_timestamp = 0; // Timestamp of when FIFO queue becomes idle
FIFO_state state = FIFO_state::running;
FIFO::state state = FIFO::state::running;
u32 approximate_load = 0;
u32 sampled_frames = 0;
}
@ -736,7 +390,7 @@ namespace rsx
/**
* Execute a backend local task queue
*/
virtual void do_local_task(FIFO_state state);
virtual void do_local_task(FIFO::state state);
virtual void emit_geometry(u32) {}
@ -778,7 +432,7 @@ namespace rsx
// sync
void sync();
flags32_t read_barrier(u32 memory_address, u32 memory_range, bool unconditional);
virtual void sync_hint(FIFO_hint hint, reports::sync_hint_payload_t payload);
virtual void sync_hint(FIFO::interrupt_hint hint, reports::sync_hint_payload_t payload);
virtual bool release_GCM_label(u32 /*address*/, u32 /*value*/) { return false; }
std::span<const std::byte> get_raw_index_array(const draw_clause& draw_indexed_clause) const;
@ -899,126 +553,4 @@ namespace rsx
{
return g_fxo->try_get<rsx::thread>();
}
template<bool IsFullLock = false, uint Stride = 128>
class reservation_lock
{
u32 addr = 0;
u32 length = 0;
inline void lock_range(u32 addr, u32 length)
{
if (!get_current_renderer()->iomap_table.lock<IsFullLock, Stride>(addr, length, get_current_cpu_thread()))
{
length = 0;
}
this->addr = addr;
this->length = length;
}
public:
reservation_lock(u32 addr, u32 length)
{
if (g_cfg.core.rsx_accurate_res_access &&
addr < constants::local_mem_base)
{
lock_range(addr, length);
}
}
reservation_lock(u32 addr, u32 length, bool setting)
{
if (setting)
{
lock_range(addr, length);
}
}
// Multi-range lock. If ranges overlap, the combined range will be acquired.
// If ranges do not overlap, the first range that is in main memory will be acquired.
reservation_lock(u32 dst_addr, u32 dst_length, u32 src_addr, u32 src_length)
{
if (g_cfg.core.rsx_accurate_res_access)
{
const auto range1 = utils::address_range::start_length(dst_addr, dst_length);
const auto range2 = utils::address_range::start_length(src_addr, src_length);
utils::address_range target_range;
if (!range1.overlaps(range2)) [[likely]]
{
target_range = (dst_addr < constants::local_mem_base) ? range1 : range2;
}
else
{
// Very unlikely
target_range = range1.get_min_max(range2);
}
if (target_range.start < constants::local_mem_base)
{
lock_range(target_range.start, target_range.length());
}
}
}
// Very special utility for batched transfers (SPU related)
template <typename T = void>
void update_if_enabled(u32 addr, u32 _length, const std::add_pointer_t<T>& lock_release = std::add_pointer_t<void>{})
{
// This check is not perfect but it covers the important cases fast (this check is only an optimization - forcing true disables it)
if (length && (this->addr / rsx_iomap_table::c_lock_stride != addr / rsx_iomap_table::c_lock_stride || (addr % rsx_iomap_table::c_lock_stride + _length) > rsx_iomap_table::c_lock_stride) && _length > 1)
{
if constexpr (!std::is_void_v<T>)
{
// See SPUThread.cpp
lock_release->release(0);
}
unlock();
lock_range(addr, _length);
}
}
void unlock(bool destructor = false)
{
if (length)
{
get_current_renderer()->iomap_table.unlock<IsFullLock, Stride>(addr, length);
if (!destructor)
{
length = 0;
}
}
}
~reservation_lock()
{
unlock(true);
}
};
class eng_lock
{
rsx::thread* pthr;
public:
eng_lock(rsx::thread* target)
:pthr(target)
{
if (pthr->is_current_thread())
{
pthr = nullptr;
}
else
{
pthr->pause();
}
}
~eng_lock()
{
if (pthr) pthr->unpause();
}
};
}

View File

@ -1,4 +1,6 @@
#include "stdafx.h"
#include "Core/RSXEngLock.hpp"
#include "Core/RSXReservationLock.hpp"
#include "RSXThread.h"
namespace rsx
@ -422,7 +424,7 @@ namespace rsx
if (It->query->sync_tag > m_sync_tag)
{
// rsx_log.trace("[Performance warning] Query hint emit during sync command.");
ptimer->sync_hint(FIFO_hint::hint_zcull_sync, { .query = It->query });
ptimer->sync_hint(FIFO::interrupt_hint::zcull_sync, { .query = It->query });
}
break;
@ -531,7 +533,7 @@ namespace rsx
{
if (It->query->num_draws && It->query->sync_tag > m_sync_tag)
{
ptimer->sync_hint(FIFO_hint::hint_zcull_sync, { .query = It->query });
ptimer->sync_hint(FIFO::interrupt_hint::zcull_sync, { .query = It->query });
ensure(It->query->sync_tag <= m_sync_tag);
}
@ -556,7 +558,7 @@ namespace rsx
const auto elapsed = m_tsc - front.query->timestamp;
if (elapsed > max_zcull_delay_us)
{
ptimer->sync_hint(FIFO_hint::hint_zcull_sync, { .query = front.query });
ptimer->sync_hint(FIFO::interrupt_hint::zcull_sync, { .query = front.query });
ensure(front.query->sync_tag <= m_sync_tag);
}
@ -704,7 +706,7 @@ namespace rsx
{
if (query->sync_tag > m_sync_tag) [[unlikely]]
{
ptimer->sync_hint(FIFO_hint::hint_zcull_sync, { .query = query });
ptimer->sync_hint(FIFO::interrupt_hint::zcull_sync, { .query = query });
ensure(m_sync_tag >= query->sync_tag);
}
}

View File

@ -673,7 +673,7 @@ VKGSRender::~VKGSRender()
// Flush DMA queue
while (!g_fxo->get<rsx::dma_manager>().sync())
{
do_local_task(rsx::FIFO_state::lock_wait);
do_local_task(rsx::FIFO::state::lock_wait);
}
//Wait for device to finish up with resources
@ -895,7 +895,7 @@ void VKGSRender::on_semaphore_acquire_wait()
(async_flip_requested & flip_request::emu_requested) ||
(m_queue_status & flush_queue_state::deadlock))
{
do_local_task(rsx::FIFO_state::lock_wait);
do_local_task(rsx::FIFO::state::lock_wait);
}
}
@ -1602,7 +1602,7 @@ bool VKGSRender::release_GCM_label(u32 address, u32 args)
return true;
}
void VKGSRender::sync_hint(rsx::FIFO_hint hint, rsx::reports::sync_hint_payload_t payload)
void VKGSRender::sync_hint(rsx::FIFO::interrupt_hint hint, rsx::reports::sync_hint_payload_t payload)
{
rsx::thread::sync_hint(hint, payload);
@ -1615,7 +1615,7 @@ void VKGSRender::sync_hint(rsx::FIFO_hint hint, rsx::reports::sync_hint_payload_
// Occlusion test result evaluation is coming up, avoid a hard sync
switch (hint)
{
case rsx::FIFO_hint::hint_conditional_render_eval:
case rsx::FIFO::interrupt_hint::conditional_render_eval:
{
// If a flush request is already enqueued, do nothing
if (m_flush_requests.pending())
@ -1645,7 +1645,7 @@ void VKGSRender::sync_hint(rsx::FIFO_hint hint, rsx::reports::sync_hint_payload_
m_last_cond_render_eval_hint = now;
break;
}
case rsx::FIFO_hint::hint_zcull_sync:
case rsx::FIFO::interrupt_hint::zcull_sync:
{
// Check if the required report is synced to this CB
auto& data = m_occlusion_map[payload.query->driver_handle];
@ -1672,7 +1672,7 @@ void VKGSRender::sync_hint(rsx::FIFO_hint hint, rsx::reports::sync_hint_payload_
}
}
void VKGSRender::do_local_task(rsx::FIFO_state state)
void VKGSRender::do_local_task(rsx::FIFO::state state)
{
if (m_queue_status & flush_queue_state::deadlock)
{
@ -1702,7 +1702,7 @@ void VKGSRender::do_local_task(rsx::FIFO_state state)
m_flush_queue_mutex.unlock();
}
}
else if (!in_begin_end && state != rsx::FIFO_state::lock_wait)
else if (!in_begin_end && state != rsx::FIFO::state::lock_wait)
{
if (m_graphics_state & rsx::pipeline_state::framebuffer_reads_dirty)
{
@ -1717,11 +1717,11 @@ void VKGSRender::do_local_task(rsx::FIFO_state state)
switch (state)
{
case rsx::FIFO_state::lock_wait:
case rsx::FIFO::state::lock_wait:
// Critical check finished
return;
//case rsx::FIFO_state::spinning:
//case rsx::FIFO_state::empty:
//case rsx::FIFO::state::spinning:
//case rsx::FIFO::state::empty:
// We have some time, check the present queue
//check_present_status();
//break;

View File

@ -248,7 +248,7 @@ public:
void set_scissor(bool clip_viewport);
void bind_viewport();
void sync_hint(rsx::FIFO_hint hint, rsx::reports::sync_hint_payload_t payload) override;
void sync_hint(rsx::FIFO::interrupt_hint hint, rsx::reports::sync_hint_payload_t payload) override;
bool release_GCM_label(u32 address, u32 data) override;
void begin_occlusion_query(rsx::reports::occlusion_query_info* query) override;
@ -282,7 +282,7 @@ protected:
void renderctl(u32 request_code, void* args) override;
void do_local_task(rsx::FIFO_state state) override;
void do_local_task(rsx::FIFO::state state) override;
bool scaled_image_from_memory(rsx::blit_src_info& src, rsx::blit_dst_info& dst, bool interpolate) override;
void notify_tile_unbound(u32 tile) override;

View File

@ -4,6 +4,7 @@
#include "rsx_utils.h"
#include "rsx_decode.h"
#include "Common/time.hpp"
#include "Core/RSXReservationLock.hpp"
#include "Emu/Cell/PPUCallback.h"
#include "Emu/Cell/lv2/sys_rsx.h"
#include "Emu/RSX/Common/BufferUtils.h"
@ -1278,6 +1279,11 @@ namespace rsx
out_pitch = out_bpp * out_w;
}
if (in_pitch == 0)
{
in_pitch = in_bpp * in_w;
}
if (in_bpp != out_bpp)
{
is_block_transfer = false;
@ -1680,12 +1686,6 @@ namespace rsx
const u8 in_format = method_registers.nv0039_input_format();
const u32 notify = arg;
// The existing GCM commands use only the value 0x1 for inFormat and outFormat
if (in_format != 0x01 || out_format != 0x01)
{
rsx_log.error("NV0039_BUFFER_NOTIFY: Unsupported format: inFormat=%d, outFormat=%d", in_format, out_format);
}
if (!line_count || !line_length)
{
rsx_log.warning("NV0039_BUFFER_NOTIFY NOPed out: pitch(in=0x%x, out=0x%x), line(len=0x%x, cnt=0x%x), fmt(in=0x%x, out=0x%x), notify=0x%x",
@ -1734,7 +1734,28 @@ namespace rsx
(dst_offset >= src_offset && dst_offset < src_max);
}();
if (is_overlapping)
if (in_format > 1 || out_format > 1) [[ unlikely ]]
{
// The formats are just input channel strides. You can use this to do cool tricks like gathering channels
// Very rare, only seen in use by Destiny
// TODO: Hw accel
for (u32 row = 0; row < line_count; ++row)
{
auto dst_ptr = dst;
auto src_ptr = src;
while (src_ptr < src + line_length)
{
*dst_ptr = *src_ptr;
src_ptr += in_format;
dst_ptr += out_format;
}
dst += out_pitch;
src += in_pitch;
}
}
else if (is_overlapping) [[ unlikely ]]
{
if (is_block_transfer)
{

View File

@ -525,6 +525,12 @@
<ClInclude Include="Emu\RSX\Common\simple_array.hpp" />
<ClInclude Include="Emu\RSX\Common\surface_cache_dma.hpp" />
<ClInclude Include="Emu\RSX\Common\time.hpp" />
<ClInclude Include="Emu\RSX\Core\RSXEngLock.hpp" />
<ClInclude Include="Emu\RSX\Core\RSXFrameBuffer.h" />
<ClInclude Include="Emu\RSX\Core\RSXIOMap.hpp" />
<ClInclude Include="Emu\RSX\Core\RSXDisplay.h" />
<ClInclude Include="Emu\RSX\Core\RSXReservationLock.hpp" />
<ClInclude Include="Emu\RSX\Core\RSXVertexTypes.h" />
<ClInclude Include="Emu\RSX\Overlays\overlay_cursor.h" />
<ClInclude Include="Emu\RSX\Overlays\overlay_edit_text.hpp" />
<ClInclude Include="Emu\RSX\Overlays\overlay_list_view.hpp" />

View File

@ -76,6 +76,9 @@
<Filter Include="Emu\GPU\RSX\Program\Interpreter">
<UniqueIdentifier>{bc97b324-1eea-445a-8fa9-6fc49e3df47c}</UniqueIdentifier>
</Filter>
<Filter Include="Emu\GPU\RSX\Core">
<UniqueIdentifier>{99b3a1c9-93ea-4498-86b0-1000793013fa}</UniqueIdentifier>
</Filter>
</ItemGroup>
<ItemGroup>
<ClCompile Include="Crypto\aes.cpp">
@ -2206,6 +2209,24 @@
<ClInclude Include="Emu\Io\recording_config.h">
<Filter>Emu\Io</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\Core\RSXIOMap.hpp">
<Filter>Emu\GPU\RSX\Core</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\Core\RSXDisplay.h">
<Filter>Emu\GPU\RSX\Core</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\Core\RSXVertexTypes.h">
<Filter>Emu\GPU\RSX\Core</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\Core\RSXFrameBuffer.h">
<Filter>Emu\GPU\RSX\Core</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\Core\RSXReservationLock.hpp">
<Filter>Emu\GPU\RSX\Core</Filter>
</ClInclude>
<ClInclude Include="Emu\RSX\Core\RSXEngLock.hpp">
<Filter>Emu\GPU\RSX\Core</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<None Include="Emu\RSX\Program\GLSLSnippets\GPUDeswizzle.glsl">