From b5a18b5462ce3328f76500d801021c46c0343282 Mon Sep 17 00:00:00 2001 From: Ben Vanik Date: Sun, 8 Nov 2015 11:54:36 -0800 Subject: [PATCH] Factoring out a lot of reusable GPU code from gl4/. --- src/xenia/emulator.cc | 4 +- src/xenia/gpu/command_processor.cc | 1113 +++++++++++++++ src/xenia/gpu/command_processor.h | 213 +++ ..._processor.cc => gl4_command_processor.cc} | 1253 ++--------------- ...nd_processor.h => gl4_command_processor.h} | 197 +-- src/xenia/gpu/gl4/gl4_graphics_system.cc | 293 +--- src/xenia/gpu/gl4/gl4_graphics_system.h | 42 +- ...iewer_main.cc => gl4_trace_viewer_main.cc} | 824 +---------- src/xenia/gpu/gl4/premake5.lua | 2 +- src/xenia/gpu/graphics_system.cc | 188 ++- src/xenia/gpu/graphics_system.h | 62 +- src/xenia/gpu/packet_disassembler.cc | 498 +++++++ src/xenia/gpu/packet_disassembler.h | 103 ++ src/xenia/gpu/trace_player.cc | 186 +++ src/xenia/gpu/trace_player.h | 57 + src/xenia/gpu/trace_protocol.h | 84 ++ src/xenia/gpu/trace_reader.cc | 152 ++ src/xenia/gpu/trace_reader.h | 102 ++ src/xenia/gpu/trace_writer.cc | 141 ++ src/xenia/gpu/trace_writer.h | 50 + src/xenia/gpu/tracing.cc | 49 - src/xenia/gpu/tracing.h | 195 --- src/xenia/ui/gl/gl_immediate_drawer.cc | 12 +- 23 files changed, 3074 insertions(+), 2746 deletions(-) create mode 100644 src/xenia/gpu/command_processor.cc create mode 100644 src/xenia/gpu/command_processor.h rename src/xenia/gpu/gl4/{command_processor.cc => gl4_command_processor.cc} (65%) rename src/xenia/gpu/gl4/{command_processor.h => gl4_command_processor.h} (50%) rename src/xenia/gpu/gl4/{trace_viewer_main.cc => gl4_trace_viewer_main.cc} (65%) create mode 100644 src/xenia/gpu/packet_disassembler.cc create mode 100644 src/xenia/gpu/packet_disassembler.h create mode 100644 src/xenia/gpu/trace_player.cc create mode 100644 src/xenia/gpu/trace_player.h create mode 100644 src/xenia/gpu/trace_protocol.h create mode 100644 src/xenia/gpu/trace_reader.cc create mode 100644 src/xenia/gpu/trace_reader.h create mode 100644 src/xenia/gpu/trace_writer.cc create mode 100644 src/xenia/gpu/trace_writer.h delete mode 100644 src/xenia/gpu/tracing.cc delete mode 100644 src/xenia/gpu/tracing.h diff --git a/src/xenia/emulator.cc b/src/xenia/emulator.cc index 62ffcd31f..4319249f1 100644 --- a/src/xenia/emulator.cc +++ b/src/xenia/emulator.cc @@ -117,7 +117,7 @@ X_STATUS Emulator::Setup(ui::Window* display_window) { } // Initialize the GPU. - graphics_system_ = xe::gpu::GraphicsSystem::Create(this); + graphics_system_ = xe::gpu::GraphicsSystem::Create(); if (!graphics_system_) { return X_STATUS_NOT_IMPLEMENTED; } @@ -144,7 +144,7 @@ X_STATUS Emulator::Setup(ui::Window* display_window) { kernel_state_ = std::make_unique(this); // Setup the core components. - result = graphics_system_->Setup(processor_.get(), display_window_->loop(), + result = graphics_system_->Setup(processor_.get(), kernel_state_.get(), display_window_); if (result) { return result; diff --git a/src/xenia/gpu/command_processor.cc b/src/xenia/gpu/command_processor.cc new file mode 100644 index 000000000..f36b43835 --- /dev/null +++ b/src/xenia/gpu/command_processor.cc @@ -0,0 +1,1113 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2015 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/command_processor.h" + +#include + +#include "xenia/base/logging.h" +#include "xenia/base/math.h" +#include "xenia/base/profiling.h" +#include "xenia/gpu/gpu_flags.h" +#include "xenia/gpu/graphics_system.h" +#include "xenia/gpu/sampler_info.h" +#include "xenia/gpu/texture_info.h" +#include "xenia/gpu/xenos.h" + +namespace xe { +namespace gpu { + +using namespace xe::gpu::xenos; + +CommandProcessor::CommandProcessor(GraphicsSystem* graphics_system, + kernel::KernelState* kernel_state) + : memory_(graphics_system->memory()), + kernel_state_(kernel_state), + graphics_system_(graphics_system), + register_file_(graphics_system_->register_file()), + trace_writer_(graphics_system->memory()->physical_membase()), + worker_running_(true), + write_ptr_index_event_(xe::threading::Event::CreateAutoResetEvent(false)), + write_ptr_index_(0) {} + +CommandProcessor::~CommandProcessor() = default; + +bool CommandProcessor::Initialize( + std::unique_ptr context) { + context_ = std::move(context); + + worker_running_ = true; + worker_thread_ = kernel::object_ref( + new kernel::XHostThread(kernel_state_, 128 * 1024, 0, [this]() { + WorkerThreadMain(); + return 0; + })); + worker_thread_->set_name("GraphicsSystem Command Processor"); + worker_thread_->Create(); + + return true; +} + +void CommandProcessor::Shutdown() { + EndTracing(); + + worker_running_ = false; + write_ptr_index_event_->Set(); + worker_thread_->Wait(0, 0, 0, nullptr); + worker_thread_.reset(); +} + +void CommandProcessor::RequestFrameTrace(const std::wstring& root_path) { + if (trace_state_ == TraceState::kStreaming) { + XELOGE("Streaming trace; cannot also trace frame."); + return; + } + if (trace_state_ == TraceState::kSingleFrame) { + XELOGE("Frame trace already pending; ignoring."); + return; + } + trace_state_ = TraceState::kSingleFrame; + trace_frame_path_ = root_path; +} + +void CommandProcessor::BeginTracing(const std::wstring& root_path) { + if (trace_state_ == TraceState::kStreaming) { + XELOGE("Streaming already active; ignoring request."); + return; + } + if (trace_state_ == TraceState::kSingleFrame) { + XELOGE("Frame trace pending; ignoring streaming request."); + return; + } + std::wstring path = root_path + L"stream"; + trace_state_ = TraceState::kStreaming; + trace_writer_.Open(path); +} + +void CommandProcessor::EndTracing() { + if (!trace_writer_.is_open()) { + return; + } + assert_true(trace_state_ == TraceState::kStreaming); + trace_writer_.Close(); +} + +void CommandProcessor::CallInThread(std::function fn) { + if (pending_fns_.empty() && + kernel::XThread::IsInThread(worker_thread_.get())) { + fn(); + } else { + pending_fns_.push(std::move(fn)); + } +} + +void CommandProcessor::ClearCaches() {} + +void CommandProcessor::WorkerThreadMain() { + context_->MakeCurrent(); + if (!SetupContext()) { + xe::FatalError("Unable to setup command processor GL state"); + return; + } + + while (worker_running_) { + while (!pending_fns_.empty()) { + auto fn = std::move(pending_fns_.front()); + pending_fns_.pop(); + fn(); + } + + uint32_t write_ptr_index = write_ptr_index_.load(); + if (write_ptr_index == 0xBAADF00D || read_ptr_index_ == write_ptr_index) { + SCOPE_profile_cpu_i("gpu", "xe::gpu::gl4::CommandProcessor::Stall"); + // We've run out of commands to execute. + // We spin here waiting for new ones, as the overhead of waiting on our + // event is too high. + PrepareForWait(); + do { + // TODO(benvanik): if we go longer than Nms, switch to waiting? + // It'll keep us from burning power. + // const int wait_time_ms = 5; + // xe::threading::Wait(write_ptr_index_event_.get(), true, + // std::chrono::milliseconds(wait_time_ms)); + xe::threading::MaybeYield(); + write_ptr_index = write_ptr_index_.load(); + } while (worker_running_ && pending_fns_.empty() && + (write_ptr_index == 0xBAADF00D || + read_ptr_index_ == write_ptr_index)); + ReturnFromWait(); + if (!worker_running_ || !pending_fns_.empty()) { + continue; + } + } + assert_true(read_ptr_index_ != write_ptr_index); + + // Execute. Note that we handle wraparound transparently. + ExecutePrimaryBuffer(read_ptr_index_, write_ptr_index); + read_ptr_index_ = write_ptr_index; + + // TODO(benvanik): use reader->Read_update_freq_ and only issue after moving + // that many indices. + if (read_ptr_writeback_ptr_) { + xe::store_and_swap( + memory_->TranslatePhysical(read_ptr_writeback_ptr_), read_ptr_index_); + } + } + + ShutdownContext(); +} + +bool CommandProcessor::SetupContext() { return true; } + +void CommandProcessor::ShutdownContext() { context_.reset(); } + +void CommandProcessor::InitializeRingBuffer(uint32_t ptr, uint32_t page_count) { + primary_buffer_ptr_ = ptr; + // Not sure this is correct, but it's a way to take the page_count back to + // the number of bytes allocated by the physical alloc. + uint32_t original_size = 1 << (0x1C - page_count - 1); + primary_buffer_size_ = original_size; +} + +void CommandProcessor::EnableReadPointerWriteBack(uint32_t ptr, + uint32_t block_size) { + // CP_RB_RPTR_ADDR Ring Buffer Read Pointer Address 0x70C + // ptr = RB_RPTR_ADDR, pointer to write back the address to. + read_ptr_writeback_ptr_ = ptr; + // CP_RB_CNTL Ring Buffer Control 0x704 + // block_size = RB_BLKSZ, number of quadwords read between updates of the + // read pointer. + read_ptr_update_freq_ = + static_cast(pow(2.0, static_cast(block_size)) / 4); +} + +void CommandProcessor::UpdateWritePointer(uint32_t value) { + write_ptr_index_ = value; + write_ptr_index_event_->Set(); +} + +void CommandProcessor::WriteRegister(uint32_t index, uint32_t value) { + RegisterFile* regs = register_file_; + if (index >= RegisterFile::kRegisterCount) { + XELOGW("CommandProcessor::WriteRegister index out of bounds: %d", index); + return; + } + + regs->values[index].u32 = value; + + // If this is a COHER register, set the dirty flag. + // This will block the command processor the next time it WAIT_MEM_REGs and + // allow us to synchronize the memory. + if (index == XE_GPU_REG_COHER_STATUS_HOST) { + regs->values[index].u32 |= 0x80000000ul; + } + + // Scratch register writeback. + if (index >= XE_GPU_REG_SCRATCH_REG0 && index <= XE_GPU_REG_SCRATCH_REG7) { + uint32_t scratch_reg = index - XE_GPU_REG_SCRATCH_REG0; + if ((1 << scratch_reg) & regs->values[XE_GPU_REG_SCRATCH_UMSK].u32) { + // Enabled - write to address. + uint32_t scratch_addr = regs->values[XE_GPU_REG_SCRATCH_ADDR].u32; + uint32_t mem_addr = scratch_addr + (scratch_reg * 4); + xe::store_and_swap(memory_->TranslatePhysical(mem_addr), value); + } + } +} + +void CommandProcessor::MakeCoherent() { + SCOPE_profile_cpu_f("gpu"); + + // Status host often has 0x01000000 or 0x03000000. + // This is likely toggling VC (vertex cache) or TC (texture cache). + // Or, it also has a direction in here maybe - there is probably + // some way to check for dest coherency (what all the COHER_DEST_BASE_* + // registers are for). + // Best docs I've found on this are here: + // http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2013/10/R6xx_R7xx_3D.pdf + // http://cgit.freedesktop.org/xorg/driver/xf86-video-radeonhd/tree/src/r6xx_accel.c?id=3f8b6eccd9dba116cc4801e7f80ce21a879c67d2#n454 + + RegisterFile* regs = register_file_; + auto status_host = regs->values[XE_GPU_REG_COHER_STATUS_HOST].u32; + // auto base_host = regs->values[XE_GPU_REG_COHER_BASE_HOST].u32; + // auto size_host = regs->values[XE_GPU_REG_COHER_SIZE_HOST].u32; + + if (!(status_host & 0x80000000ul)) { + return; + } + + // TODO(benvanik): notify resource cache of base->size and type. + // XELOGD("Make %.8X -> %.8X (%db) coherent", base_host, base_host + + // size_host, size_host); + + // Mark coherent. + status_host &= ~0x80000000ul; + regs->values[XE_GPU_REG_COHER_STATUS_HOST].u32 = status_host; +} + +void CommandProcessor::PrepareForWait() { trace_writer_.Flush(); } + +void CommandProcessor::ReturnFromWait() {} + +void CommandProcessor::IssueSwap(uint32_t frontbuffer_ptr, + uint32_t frontbuffer_width, + uint32_t frontbuffer_height) { + SCOPE_profile_cpu_f("gpu"); + if (!swap_request_handler_) { + return; + } + + // If there was a swap pending we drop it on the floor. + // This prevents the display from pulling the backbuffer out from under us. + // If we skip a lot then we may need to buffer more, but as the display + // thread should be fairly idle that shouldn't happen. + if (!FLAGS_vsync) { + std::lock_guard lock(swap_state_.mutex); + if (swap_state_.pending) { + swap_state_.pending = false; + // TODO(benvanik): frame skip counter. + XELOGW("Skipped frame!"); + } + } else { + // Spin until no more pending swap. + while (true) { + { + std::lock_guard lock(swap_state_.mutex); + if (!swap_state_.pending) { + break; + } + } + xe::threading::MaybeYield(); + } + } + + PerformSwap(frontbuffer_ptr, frontbuffer_width, frontbuffer_height); + + { + // Set pending so that the display will swap the next time it can. + std::lock_guard lock(swap_state_.mutex); + swap_state_.pending = true; + } + + // Notify the display a swap is pending so that our changes are picked up. + // It does the actual front/back buffer swap. + swap_request_handler_(); +} + +class CommandProcessor::RingbufferReader { + public: + RingbufferReader(uint8_t* membase, uint32_t base_ptr, uint32_t ptr_mask, + uint32_t start_ptr, uint32_t end_ptr) + : membase_(membase), + base_ptr_(base_ptr), + ptr_mask_(ptr_mask), + end_ptr_(end_ptr), + ptr_(start_ptr), + offset_(0) {} + + uint32_t ptr() const { return ptr_; } + uint32_t offset() const { return offset_; } + bool can_read() const { return ptr_ != end_ptr_; } + + uint32_t Peek() { return xe::load_and_swap(membase_ + ptr_); } + + void CheckRead(uint32_t words) { + assert_true(ptr_ + words * sizeof(uint32_t) <= end_ptr_); + } + + uint32_t Read() { + uint32_t value = xe::load_and_swap(membase_ + ptr_); + Advance(1); + return value; + } + + void Advance(uint32_t words) { + offset_ += words; + ptr_ = ptr_ + words * sizeof(uint32_t); + if (ptr_mask_) { + ptr_ = base_ptr_ + + (((ptr_ - base_ptr_) / sizeof(uint32_t)) & ptr_mask_) * + sizeof(uint32_t); + } + } + + void Skip(uint32_t words) { Advance(words); } + + private: + uint8_t* membase_; + + uint32_t base_ptr_; + uint32_t ptr_mask_; + uint32_t end_ptr_; + uint32_t ptr_; + uint32_t offset_; +}; + +void CommandProcessor::ExecutePrimaryBuffer(uint32_t start_index, + uint32_t end_index) { + SCOPE_profile_cpu_f("gpu"); + + // Adjust pointer base. + uint32_t start_ptr = primary_buffer_ptr_ + start_index * sizeof(uint32_t); + start_ptr = (primary_buffer_ptr_ & ~0x1FFFFFFF) | (start_ptr & 0x1FFFFFFF); + uint32_t end_ptr = primary_buffer_ptr_ + end_index * sizeof(uint32_t); + end_ptr = (primary_buffer_ptr_ & ~0x1FFFFFFF) | (end_ptr & 0x1FFFFFFF); + + trace_writer_.WritePrimaryBufferStart(start_ptr, end_index - start_index); + + // Execute commands! + uint32_t ptr_mask = (primary_buffer_size_ / sizeof(uint32_t)) - 1; + RingbufferReader reader(memory_->physical_membase(), primary_buffer_ptr_, + ptr_mask, start_ptr, end_ptr); + while (reader.can_read()) { + ExecutePacket(&reader); + } + if (end_index > start_index) { + assert_true(reader.offset() == (end_index - start_index)); + } + + trace_writer_.WritePrimaryBufferEnd(); +} + +void CommandProcessor::ExecuteIndirectBuffer(uint32_t ptr, uint32_t length) { + SCOPE_profile_cpu_f("gpu"); + + trace_writer_.WriteIndirectBufferStart(ptr, length / sizeof(uint32_t)); + + // Execute commands! + uint32_t ptr_mask = 0; + RingbufferReader reader(memory_->physical_membase(), primary_buffer_ptr_, + ptr_mask, ptr, ptr + length * sizeof(uint32_t)); + while (reader.can_read()) { + ExecutePacket(&reader); + } + + trace_writer_.WriteIndirectBufferEnd(); +} + +void CommandProcessor::ExecutePacket(uint32_t ptr, uint32_t count) { + uint32_t ptr_mask = 0; + RingbufferReader reader(memory_->physical_membase(), primary_buffer_ptr_, + ptr_mask, ptr, ptr + count * sizeof(uint32_t)); + while (reader.can_read()) { + ExecutePacket(&reader); + } +} + +bool CommandProcessor::ExecutePacket(RingbufferReader* reader) { + const uint32_t packet = reader->Read(); + const uint32_t packet_type = packet >> 30; + if (packet == 0) { + trace_writer_.WritePacketStart(reader->ptr() - 4, 1); + trace_writer_.WritePacketEnd(); + return true; + } + + switch (packet_type) { + case 0x00: + return ExecutePacketType0(reader, packet); + case 0x01: + return ExecutePacketType1(reader, packet); + case 0x02: + return ExecutePacketType2(reader, packet); + case 0x03: + return ExecutePacketType3(reader, packet); + default: + assert_unhandled_case(packet_type); + return false; + } +} + +bool CommandProcessor::ExecutePacketType0(RingbufferReader* reader, + uint32_t packet) { + // Type-0 packet. + // Write count registers in sequence to the registers starting at + // (base_index << 2). + + uint32_t count = ((packet >> 16) & 0x3FFF) + 1; + trace_writer_.WritePacketStart(reader->ptr() - 4, 1 + count); + + uint32_t base_index = (packet & 0x7FFF); + uint32_t write_one_reg = (packet >> 15) & 0x1; + for (uint32_t m = 0; m < count; m++) { + uint32_t reg_data = reader->Read(); + uint32_t target_index = write_one_reg ? base_index : base_index + m; + WriteRegister(target_index, reg_data); + } + + trace_writer_.WritePacketEnd(); + return true; +} + +bool CommandProcessor::ExecutePacketType1(RingbufferReader* reader, + uint32_t packet) { + // Type-1 packet. + // Contains two registers of data. Type-0 should be more common. + trace_writer_.WritePacketStart(reader->ptr() - 4, 3); + uint32_t reg_index_1 = packet & 0x7FF; + uint32_t reg_index_2 = (packet >> 11) & 0x7FF; + uint32_t reg_data_1 = reader->Read(); + uint32_t reg_data_2 = reader->Read(); + WriteRegister(reg_index_1, reg_data_1); + WriteRegister(reg_index_2, reg_data_2); + trace_writer_.WritePacketEnd(); + return true; +} + +bool CommandProcessor::ExecutePacketType2(RingbufferReader* reader, + uint32_t packet) { + // Type-2 packet. + // No-op. Do nothing. + trace_writer_.WritePacketStart(reader->ptr() - 4, 1); + trace_writer_.WritePacketEnd(); + return true; +} + +bool CommandProcessor::ExecutePacketType3(RingbufferReader* reader, + uint32_t packet) { + // Type-3 packet. + uint32_t opcode = (packet >> 8) & 0x7F; + uint32_t count = ((packet >> 16) & 0x3FFF) + 1; + auto data_start_offset = reader->offset(); + + // To handle nesting behavior when tracing we special case indirect buffers. + if (opcode == PM4_INDIRECT_BUFFER) { + trace_writer_.WritePacketStart(reader->ptr() - 4, 2); + } else { + trace_writer_.WritePacketStart(reader->ptr() - 4, 1 + count); + } + + // & 1 == predicate - when set, we do bin check to see if we should execute + // the packet. Only type 3 packets are affected. + // We also skip predicated swaps, as they are never valid (probably?). + if (packet & 1) { + bool any_pass = (bin_select_ & bin_mask_) != 0; + if (!any_pass || opcode == PM4_XE_SWAP) { + reader->Skip(count); + trace_writer_.WritePacketEnd(); + return true; + } + } + + bool result = false; + switch (opcode) { + case PM4_ME_INIT: + result = ExecutePacketType3_ME_INIT(reader, packet, count); + break; + case PM4_NOP: + result = ExecutePacketType3_NOP(reader, packet, count); + break; + case PM4_INTERRUPT: + result = ExecutePacketType3_INTERRUPT(reader, packet, count); + break; + case PM4_XE_SWAP: + result = ExecutePacketType3_XE_SWAP(reader, packet, count); + break; + case PM4_INDIRECT_BUFFER: + result = ExecutePacketType3_INDIRECT_BUFFER(reader, packet, count); + break; + case PM4_WAIT_REG_MEM: + result = ExecutePacketType3_WAIT_REG_MEM(reader, packet, count); + break; + case PM4_REG_RMW: + result = ExecutePacketType3_REG_RMW(reader, packet, count); + break; + case PM4_COND_WRITE: + result = ExecutePacketType3_COND_WRITE(reader, packet, count); + break; + case PM4_EVENT_WRITE: + result = ExecutePacketType3_EVENT_WRITE(reader, packet, count); + break; + case PM4_EVENT_WRITE_SHD: + result = ExecutePacketType3_EVENT_WRITE_SHD(reader, packet, count); + break; + case PM4_EVENT_WRITE_EXT: + result = ExecutePacketType3_EVENT_WRITE_EXT(reader, packet, count); + break; + case PM4_DRAW_INDX: + result = ExecutePacketType3_DRAW_INDX(reader, packet, count); + break; + case PM4_DRAW_INDX_2: + result = ExecutePacketType3_DRAW_INDX_2(reader, packet, count); + break; + case PM4_SET_CONSTANT: + result = ExecutePacketType3_SET_CONSTANT(reader, packet, count); + break; + case PM4_SET_CONSTANT2: + result = ExecutePacketType3_SET_CONSTANT2(reader, packet, count); + break; + case PM4_LOAD_ALU_CONSTANT: + result = ExecutePacketType3_LOAD_ALU_CONSTANT(reader, packet, count); + break; + case PM4_SET_SHADER_CONSTANTS: + result = ExecutePacketType3_SET_SHADER_CONSTANTS(reader, packet, count); + break; + case PM4_IM_LOAD: + result = ExecutePacketType3_IM_LOAD(reader, packet, count); + break; + case PM4_IM_LOAD_IMMEDIATE: + result = ExecutePacketType3_IM_LOAD_IMMEDIATE(reader, packet, count); + break; + case PM4_INVALIDATE_STATE: + result = ExecutePacketType3_INVALIDATE_STATE(reader, packet, count); + break; + + case PM4_SET_BIN_MASK_LO: { + uint32_t value = reader->Read(); + bin_mask_ = (bin_mask_ & 0xFFFFFFFF00000000ull) | value; + result = true; + } break; + case PM4_SET_BIN_MASK_HI: { + uint32_t value = reader->Read(); + bin_mask_ = + (bin_mask_ & 0xFFFFFFFFull) | (static_cast(value) << 32); + result = true; + } break; + case PM4_SET_BIN_SELECT_LO: { + uint32_t value = reader->Read(); + bin_select_ = (bin_select_ & 0xFFFFFFFF00000000ull) | value; + result = true; + } break; + case PM4_SET_BIN_SELECT_HI: { + uint32_t value = reader->Read(); + bin_select_ = + (bin_select_ & 0xFFFFFFFFull) | (static_cast(value) << 32); + result = true; + } break; + + // Ignored packets - useful if breaking on the default handler below. + case 0x50: // 0xC0015000 usually 2 words, 0xFFFFFFFF / 0x00000000 + case 0x51: // 0xC0015100 usually 2 words, 0xFFFFFFFF / 0xFFFFFFFF + reader->Skip(count); + break; + + default: + reader->Skip(count); + break; + } + + trace_writer_.WritePacketEnd(); + assert_true(reader->offset() == data_start_offset + count); + return result; +} + +bool CommandProcessor::ExecutePacketType3_ME_INIT(RingbufferReader* reader, + uint32_t packet, + uint32_t count) { + // initialize CP's micro-engine + reader->Advance(count); + return true; +} + +bool CommandProcessor::ExecutePacketType3_NOP(RingbufferReader* reader, + uint32_t packet, uint32_t count) { + // skip N 32-bit words to get to the next packet + // No-op, ignore some data. + reader->Advance(count); + return true; +} + +bool CommandProcessor::ExecutePacketType3_INTERRUPT(RingbufferReader* reader, + uint32_t packet, + uint32_t count) { + SCOPE_profile_cpu_f("gpu"); + + // generate interrupt from the command stream + uint32_t cpu_mask = reader->Read(); + for (int n = 0; n < 6; n++) { + if (cpu_mask & (1 << n)) { + graphics_system_->DispatchInterruptCallback(1, n); + } + } + return true; +} + +bool CommandProcessor::ExecutePacketType3_XE_SWAP(RingbufferReader* reader, + uint32_t packet, + uint32_t count) { + SCOPE_profile_cpu_f("gpu"); + + XELOGI("XE_SWAP"); + + // Xenia-specific VdSwap hook. + // VdSwap will post this to tell us we need to swap the screen/fire an + // interrupt. + // 63 words here, but only the first has any data. + uint32_t magic = reader->Read(); + assert_true(magic == 'SWAP'); + + // TODO(benvanik): only swap frontbuffer ptr. + uint32_t frontbuffer_ptr = reader->Read(); + uint32_t frontbuffer_width = reader->Read(); + uint32_t frontbuffer_height = reader->Read(); + reader->Advance(count - 4); + + if (swap_mode_ == SwapMode::kNormal) { + IssueSwap(frontbuffer_ptr, frontbuffer_width, frontbuffer_height); + } + + if (trace_writer_.is_open()) { + trace_writer_.WriteEvent(EventType::kSwap); + trace_writer_.Flush(); + if (trace_state_ == TraceState::kSingleFrame) { + trace_state_ = TraceState::kDisabled; + trace_writer_.Close(); + } + } else if (trace_state_ == TraceState::kSingleFrame) { + // New trace request - we only start tracing at the beginning of a frame. + auto frame_number = L"frame_" + std::to_wstring(counter_); + auto path = trace_frame_path_ + frame_number; + trace_writer_.Open(path); + } + ++counter_; + return true; +} + +bool CommandProcessor::ExecutePacketType3_INDIRECT_BUFFER( + RingbufferReader* reader, uint32_t packet, uint32_t count) { + // indirect buffer dispatch + uint32_t list_ptr = CpuToGpu(reader->Read()); + uint32_t list_length = reader->Read(); + ExecuteIndirectBuffer(GpuToCpu(list_ptr), list_length); + return true; +} + +bool CommandProcessor::ExecutePacketType3_WAIT_REG_MEM(RingbufferReader* reader, + uint32_t packet, + uint32_t count) { + SCOPE_profile_cpu_f("gpu"); + + // wait until a register or memory location is a specific value + uint32_t wait_info = reader->Read(); + uint32_t poll_reg_addr = reader->Read(); + uint32_t ref = reader->Read(); + uint32_t mask = reader->Read(); + uint32_t wait = reader->Read(); + bool matched = false; + do { + uint32_t value; + if (wait_info & 0x10) { + // Memory. + auto endianness = static_cast(poll_reg_addr & 0x3); + poll_reg_addr &= ~0x3; + value = xe::load(memory_->TranslatePhysical(poll_reg_addr)); + value = GpuSwap(value, endianness); + trace_writer_.WriteMemoryRead(CpuToGpu(poll_reg_addr), 4); + } else { + // Register. + assert_true(poll_reg_addr < RegisterFile::kRegisterCount); + value = register_file_->values[poll_reg_addr].u32; + if (poll_reg_addr == XE_GPU_REG_COHER_STATUS_HOST) { + MakeCoherent(); + value = register_file_->values[poll_reg_addr].u32; + } + } + switch (wait_info & 0x7) { + case 0x0: // Never. + matched = false; + break; + case 0x1: // Less than reference. + matched = (value & mask) < ref; + break; + case 0x2: // Less than or equal to reference. + matched = (value & mask) <= ref; + break; + case 0x3: // Equal to reference. + matched = (value & mask) == ref; + break; + case 0x4: // Not equal to reference. + matched = (value & mask) != ref; + break; + case 0x5: // Greater than or equal to reference. + matched = (value & mask) >= ref; + break; + case 0x6: // Greater than reference. + matched = (value & mask) > ref; + break; + case 0x7: // Always + matched = true; + break; + } + if (!matched) { + // Wait. + if (wait >= 0x100) { + PrepareForWait(); + if (!FLAGS_vsync) { + // User wants it fast and dangerous. + xe::threading::MaybeYield(); + } else { + xe::threading::Sleep(std::chrono::milliseconds(wait / 0x100)); + } + xe::threading::SyncMemory(); + ReturnFromWait(); + } else { + xe::threading::MaybeYield(); + } + } + } while (!matched); + return true; +} + +bool CommandProcessor::ExecutePacketType3_REG_RMW(RingbufferReader* reader, + + uint32_t packet, + uint32_t count) { + // register read/modify/write + // ? (used during shader upload and edram setup) + uint32_t rmw_info = reader->Read(); + uint32_t and_mask = reader->Read(); + uint32_t or_mask = reader->Read(); + uint32_t value = register_file_->values[rmw_info & 0x1FFF].u32; + if ((rmw_info >> 30) & 0x1) { + // | reg + value |= register_file_->values[or_mask & 0x1FFF].u32; + } else { + // | imm + value |= or_mask; + } + if ((rmw_info >> 31) & 0x1) { + // & reg + value &= register_file_->values[and_mask & 0x1FFF].u32; + } else { + // & imm + value &= and_mask; + } + WriteRegister(rmw_info & 0x1FFF, value); + return true; +} + +bool CommandProcessor::ExecutePacketType3_COND_WRITE(RingbufferReader* reader, + uint32_t packet, + uint32_t count) { + // conditional write to memory or register + uint32_t wait_info = reader->Read(); + uint32_t poll_reg_addr = reader->Read(); + uint32_t ref = reader->Read(); + uint32_t mask = reader->Read(); + uint32_t write_reg_addr = reader->Read(); + uint32_t write_data = reader->Read(); + uint32_t value; + if (wait_info & 0x10) { + // Memory. + auto endianness = static_cast(poll_reg_addr & 0x3); + poll_reg_addr &= ~0x3; + trace_writer_.WriteMemoryRead(CpuToGpu(poll_reg_addr), 4); + value = xe::load(memory_->TranslatePhysical(poll_reg_addr)); + value = GpuSwap(value, endianness); + } else { + // Register. + assert_true(poll_reg_addr < RegisterFile::kRegisterCount); + value = register_file_->values[poll_reg_addr].u32; + } + bool matched = false; + switch (wait_info & 0x7) { + case 0x0: // Never. + matched = false; + break; + case 0x1: // Less than reference. + matched = (value & mask) < ref; + break; + case 0x2: // Less than or equal to reference. + matched = (value & mask) <= ref; + break; + case 0x3: // Equal to reference. + matched = (value & mask) == ref; + break; + case 0x4: // Not equal to reference. + matched = (value & mask) != ref; + break; + case 0x5: // Greater than or equal to reference. + matched = (value & mask) >= ref; + break; + case 0x6: // Greater than reference. + matched = (value & mask) > ref; + break; + case 0x7: // Always + matched = true; + break; + } + if (matched) { + // Write. + if (wait_info & 0x100) { + // Memory. + auto endianness = static_cast(write_reg_addr & 0x3); + write_reg_addr &= ~0x3; + write_data = GpuSwap(write_data, endianness); + xe::store(memory_->TranslatePhysical(write_reg_addr), write_data); + trace_writer_.WriteMemoryWrite(CpuToGpu(write_reg_addr), 4); + } else { + // Register. + WriteRegister(write_reg_addr, write_data); + } + } + return true; +} + +bool CommandProcessor::ExecutePacketType3_EVENT_WRITE(RingbufferReader* reader, + uint32_t packet, + uint32_t count) { + // generate an event that creates a write to memory when completed + uint32_t initiator = reader->Read(); + // Writeback initiator. + WriteRegister(XE_GPU_REG_VGT_EVENT_INITIATOR, initiator & 0x3F); + if (count == 1) { + // Just an event flag? Where does this write? + } else { + // Write to an address. + assert_always(); + reader->Advance(count - 1); + } + return true; +} + +bool CommandProcessor::ExecutePacketType3_EVENT_WRITE_SHD( + RingbufferReader* reader, uint32_t packet, uint32_t count) { + // generate a VS|PS_done event + uint32_t initiator = reader->Read(); + uint32_t address = reader->Read(); + uint32_t value = reader->Read(); + // Writeback initiator. + WriteRegister(XE_GPU_REG_VGT_EVENT_INITIATOR, initiator & 0x3F); + uint32_t data_value; + if ((initiator >> 31) & 0x1) { + // Write counter (GPU vblank counter?). + data_value = counter_; + } else { + // Write value. + data_value = value; + } + auto endianness = static_cast(address & 0x3); + address &= ~0x3; + data_value = GpuSwap(data_value, endianness); + xe::store(memory_->TranslatePhysical(address), data_value); + trace_writer_.WriteMemoryWrite(CpuToGpu(address), 4); + return true; +} + +bool CommandProcessor::ExecutePacketType3_EVENT_WRITE_EXT( + RingbufferReader* reader, uint32_t packet, uint32_t count) { + // generate a screen extent event + uint32_t initiator = reader->Read(); + uint32_t address = reader->Read(); + // Writeback initiator. + WriteRegister(XE_GPU_REG_VGT_EVENT_INITIATOR, initiator & 0x3F); + auto endianness = static_cast(address & 0x3); + address &= ~0x3; + // Let us hope we can fake this. + uint16_t extents[] = { + 0 >> 3, // min x + 2560 >> 3, // max x + 0 >> 3, // min y + 2560 >> 3, // max y + 0, // min z + 1, // max z + }; + assert_true(endianness == xenos::Endian::k8in16); + xe::copy_and_swap_16_aligned( + reinterpret_cast(memory_->TranslatePhysical(address)), extents, + xe::countof(extents)); + trace_writer_.WriteMemoryWrite(CpuToGpu(address), sizeof(extents)); + return true; +} + +bool CommandProcessor::ExecutePacketType3_DRAW_INDX(RingbufferReader* reader, + uint32_t packet, + uint32_t count) { + // initiate fetch of index buffer and draw + // dword0 = viz query info + /*uint32_t dword0 =*/reader->Read(); + uint32_t dword1 = reader->Read(); + uint32_t index_count = dword1 >> 16; + auto prim_type = static_cast(dword1 & 0x3F); + bool is_indexed = false; + IndexBufferInfo index_buffer_info; + uint32_t src_sel = (dword1 >> 6) & 0x3; + if (src_sel == 0x0) { + // Indexed draw. + is_indexed = true; + index_buffer_info.guest_base = reader->Read(); + uint32_t index_size = reader->Read(); + index_buffer_info.endianness = static_cast(index_size >> 30); + index_size &= 0x00FFFFFF; + bool index_32bit = (dword1 >> 11) & 0x1; + index_buffer_info.format = + index_32bit ? IndexFormat::kInt32 : IndexFormat::kInt16; + index_size *= index_32bit ? 4 : 2; + index_buffer_info.length = index_size; + index_buffer_info.count = index_count; + } else if (src_sel == 0x2) { + // Auto draw. + index_buffer_info.guest_base = 0; + index_buffer_info.length = 0; + } else { + // Unknown source select. + assert_always(); + } + return IssueDraw(prim_type, index_count, + is_indexed ? &index_buffer_info : nullptr); +} + +bool CommandProcessor::ExecutePacketType3_DRAW_INDX_2(RingbufferReader* reader, + uint32_t packet, + uint32_t count) { + // draw using supplied indices in packet + uint32_t dword0 = reader->Read(); + uint32_t index_count = dword0 >> 16; + auto prim_type = static_cast(dword0 & 0x3F); + uint32_t src_sel = (dword0 >> 6) & 0x3; + assert_true(src_sel == 0x2); // 'SrcSel=AutoIndex' + // Index buffer unused as automatic. + // bool index_32bit = (dword0 >> 11) & 0x1; + // uint32_t indices_size = index_count * (index_32bit ? 4 : 2); + // uint32_t index_ptr = reader->ptr(); + reader->Advance(count - 1); + return IssueDraw(prim_type, index_count, nullptr); +} + +bool CommandProcessor::ExecutePacketType3_SET_CONSTANT(RingbufferReader* reader, + uint32_t packet, + uint32_t count) { + // load constant into chip and to memory + // PM4_REG(reg) ((0x4 << 16) | (GSL_HAL_SUBBLOCK_OFFSET(reg))) + // reg - 0x2000 + uint32_t offset_type = reader->Read(); + uint32_t index = offset_type & 0x7FF; + uint32_t type = (offset_type >> 16) & 0xFF; + switch (type) { + case 0: // ALU + index += 0x4000; + break; + case 1: // FETCH + index += 0x4800; + break; + case 2: // BOOL + index += 0x4900; + break; + case 3: // LOOP + index += 0x4908; + break; + case 4: // REGISTERS + index += 0x2000; + break; + default: + assert_always(); + reader->Skip(count - 1); + return true; + } + for (uint32_t n = 0; n < count - 1; n++, index++) { + uint32_t data = reader->Read(); + WriteRegister(index, data); + } + return true; +} + +bool CommandProcessor::ExecutePacketType3_SET_CONSTANT2( + RingbufferReader* reader, uint32_t packet, uint32_t count) { + uint32_t offset_type = reader->Read(); + uint32_t index = offset_type & 0xFFFF; + for (uint32_t n = 0; n < count - 1; n++, index++) { + uint32_t data = reader->Read(); + WriteRegister(index, data); + } + return true; +} + +bool CommandProcessor::ExecutePacketType3_LOAD_ALU_CONSTANT( + RingbufferReader* reader, uint32_t packet, uint32_t count) { + // load constants from memory + uint32_t address = reader->Read(); + address &= 0x3FFFFFFF; + uint32_t offset_type = reader->Read(); + uint32_t index = offset_type & 0x7FF; + uint32_t size_dwords = reader->Read(); + size_dwords &= 0xFFF; + uint32_t type = (offset_type >> 16) & 0xFF; + switch (type) { + case 0: // ALU + index += 0x4000; + break; + case 1: // FETCH + index += 0x4800; + break; + case 2: // BOOL + index += 0x4900; + break; + case 3: // LOOP + index += 0x4908; + break; + case 4: // REGISTERS + index += 0x2000; + break; + default: + assert_always(); + return true; + } + trace_writer_.WriteMemoryRead(CpuToGpu(address), size_dwords * 4); + for (uint32_t n = 0; n < size_dwords; n++, index++) { + uint32_t data = xe::load_and_swap( + memory_->TranslatePhysical(address + n * 4)); + WriteRegister(index, data); + } + return true; +} + +bool CommandProcessor::ExecutePacketType3_SET_SHADER_CONSTANTS( + RingbufferReader* reader, uint32_t packet, uint32_t count) { + uint32_t offset_type = reader->Read(); + uint32_t index = offset_type & 0xFFFF; + for (uint32_t n = 0; n < count - 1; n++, index++) { + uint32_t data = reader->Read(); + WriteRegister(index, data); + } + return true; +} + +bool CommandProcessor::ExecutePacketType3_IM_LOAD(RingbufferReader* reader, + uint32_t packet, + uint32_t count) { + // load sequencer instruction memory (pointer-based) + uint32_t addr_type = reader->Read(); + auto shader_type = static_cast(addr_type & 0x3); + uint32_t addr = addr_type & ~0x3; + uint32_t start_size = reader->Read(); + uint32_t start = start_size >> 16; + uint32_t size_dwords = start_size & 0xFFFF; // dwords + assert_true(start == 0); + trace_writer_.WriteMemoryRead(CpuToGpu(addr), size_dwords * 4); + LoadShader(shader_type, addr, memory_->TranslatePhysical(addr), + size_dwords); + return true; +} + +bool CommandProcessor::ExecutePacketType3_IM_LOAD_IMMEDIATE( + RingbufferReader* reader, uint32_t packet, uint32_t count) { + // load sequencer instruction memory (code embedded in packet) + uint32_t dword0 = reader->Read(); + uint32_t dword1 = reader->Read(); + auto shader_type = static_cast(dword0); + uint32_t start_size = dword1; + uint32_t start = start_size >> 16; + uint32_t size_dwords = start_size & 0xFFFF; // dwords + assert_true(start == 0); + reader->CheckRead(size_dwords); + LoadShader(shader_type, reader->ptr(), + memory_->TranslatePhysical(reader->ptr()), size_dwords); + reader->Advance(size_dwords); + return true; +} + +bool CommandProcessor::ExecutePacketType3_INVALIDATE_STATE( + RingbufferReader* reader, uint32_t packet, uint32_t count) { + // selective invalidation of state pointers + /*uint32_t mask =*/reader->Read(); + // driver_->InvalidateState(mask); + return true; +} + +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/command_processor.h b/src/xenia/gpu/command_processor.h new file mode 100644 index 000000000..f26f4749e --- /dev/null +++ b/src/xenia/gpu/command_processor.h @@ -0,0 +1,213 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2015 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_COMMAND_PROCESSOR_H_ +#define XENIA_GPU_COMMAND_PROCESSOR_H_ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "xenia/base/threading.h" +#include "xenia/gpu/register_file.h" +#include "xenia/gpu/trace_writer.h" +#include "xenia/gpu/xenos.h" +#include "xenia/kernel/xthread.h" +#include "xenia/memory.h" +#include "xenia/ui/graphics_context.h" + +namespace xe { +namespace gpu { + +class GraphicsSystem; + +struct SwapState { + // Lock must be held when changing data in this structure. + std::mutex mutex; + // Dimensions of the framebuffer textures. Should match window size. + uint32_t width = 0; + uint32_t height = 0; + // Current front buffer, being drawn to the screen. + uintptr_t front_buffer_texture = 0; + // Current back buffer, being updated by the CP. + uintptr_t back_buffer_texture = 0; + // Whether the back buffer is dirty and a swap is pending. + bool pending = false; +}; + +enum class SwapMode { + kNormal, + kIgnored, +}; + +class CommandProcessor { + public: + CommandProcessor(GraphicsSystem* graphics_system, + kernel::KernelState* kernel_state); + virtual ~CommandProcessor(); + + uint32_t counter() const { return counter_; } + void increment_counter() { counter_++; } + + virtual bool Initialize(std::unique_ptr context); + virtual void Shutdown(); + + void CallInThread(std::function fn); + + virtual void ClearCaches(); + + SwapState& swap_state() { return swap_state_; } + void set_swap_mode(SwapMode swap_mode) { swap_mode_ = swap_mode; } + void IssueSwap(uint32_t frontbuffer_ptr, uint32_t frontbuffer_width, + uint32_t frontbuffer_height); + + void set_swap_request_handler(std::function fn) { + swap_request_handler_ = fn; + } + + void RequestFrameTrace(const std::wstring& root_path); + void BeginTracing(const std::wstring& root_path); + void EndTracing(); + + void InitializeRingBuffer(uint32_t ptr, uint32_t page_count); + void EnableReadPointerWriteBack(uint32_t ptr, uint32_t block_size); + + void UpdateWritePointer(uint32_t value); + + void ExecutePacket(uint32_t ptr, uint32_t count); + + protected: + class RingbufferReader; + + struct IndexBufferInfo { + xenos::IndexFormat format = xenos::IndexFormat::kInt16; + xenos::Endian endianness = xenos::Endian::kUnspecified; + uint32_t count = 0; + uint32_t guest_base = 0; + size_t length = 0; + }; + + void WorkerThreadMain(); + virtual bool SetupContext() = 0; + virtual void ShutdownContext() = 0; + + void WriteRegister(uint32_t index, uint32_t value); + + virtual void MakeCoherent(); + virtual void PrepareForWait(); + virtual void ReturnFromWait(); + + virtual void PerformSwap(uint32_t frontbuffer_ptr, uint32_t frontbuffer_width, + uint32_t frontbuffer_height) = 0; + + void ExecutePrimaryBuffer(uint32_t start_index, uint32_t end_index); + void ExecuteIndirectBuffer(uint32_t ptr, uint32_t length); + bool ExecutePacket(RingbufferReader* reader); + bool ExecutePacketType0(RingbufferReader* reader, uint32_t packet); + bool ExecutePacketType1(RingbufferReader* reader, uint32_t packet); + bool ExecutePacketType2(RingbufferReader* reader, uint32_t packet); + bool ExecutePacketType3(RingbufferReader* reader, uint32_t packet); + bool ExecutePacketType3_ME_INIT(RingbufferReader* reader, uint32_t packet, + uint32_t count); + bool ExecutePacketType3_NOP(RingbufferReader* reader, uint32_t packet, + uint32_t count); + bool ExecutePacketType3_INTERRUPT(RingbufferReader* reader, uint32_t packet, + uint32_t count); + bool ExecutePacketType3_XE_SWAP(RingbufferReader* reader, uint32_t packet, + uint32_t count); + bool ExecutePacketType3_INDIRECT_BUFFER(RingbufferReader* reader, + uint32_t packet, uint32_t count); + bool ExecutePacketType3_WAIT_REG_MEM(RingbufferReader* reader, + uint32_t packet, uint32_t count); + bool ExecutePacketType3_REG_RMW(RingbufferReader* reader, uint32_t packet, + uint32_t count); + bool ExecutePacketType3_COND_WRITE(RingbufferReader* reader, uint32_t packet, + uint32_t count); + bool ExecutePacketType3_EVENT_WRITE(RingbufferReader* reader, uint32_t packet, + uint32_t count); + bool ExecutePacketType3_EVENT_WRITE_SHD(RingbufferReader* reader, + uint32_t packet, uint32_t count); + bool ExecutePacketType3_EVENT_WRITE_EXT(RingbufferReader* reader, + uint32_t packet, uint32_t count); + bool ExecutePacketType3_DRAW_INDX(RingbufferReader* reader, uint32_t packet, + uint32_t count); + bool ExecutePacketType3_DRAW_INDX_2(RingbufferReader* reader, uint32_t packet, + uint32_t count); + bool ExecutePacketType3_SET_CONSTANT(RingbufferReader* reader, + uint32_t packet, uint32_t count); + bool ExecutePacketType3_SET_CONSTANT2(RingbufferReader* reader, + uint32_t packet, uint32_t count); + bool ExecutePacketType3_LOAD_ALU_CONSTANT(RingbufferReader* reader, + uint32_t packet, uint32_t count); + bool ExecutePacketType3_SET_SHADER_CONSTANTS(RingbufferReader* reader, + uint32_t packet, uint32_t count); + bool ExecutePacketType3_IM_LOAD(RingbufferReader* reader, uint32_t packet, + uint32_t count); + bool ExecutePacketType3_IM_LOAD_IMMEDIATE(RingbufferReader* reader, + + uint32_t packet, uint32_t count); + bool ExecutePacketType3_INVALIDATE_STATE(RingbufferReader* reader, + uint32_t packet, uint32_t count); + + virtual bool LoadShader(ShaderType shader_type, uint32_t guest_address, + const uint32_t* host_address, + uint32_t dword_count) = 0; + + virtual bool IssueDraw(PrimitiveType prim_type, uint32_t index_count, + IndexBufferInfo* index_buffer_info) = 0; + virtual bool IssueCopy() = 0; + + Memory* memory_ = nullptr; + kernel::KernelState* kernel_state_ = nullptr; + GraphicsSystem* graphics_system_ = nullptr; + RegisterFile* register_file_ = nullptr; + + TraceWriter trace_writer_; + enum class TraceState { + kDisabled, + kStreaming, + kSingleFrame, + }; + TraceState trace_state_ = TraceState::kDisabled; + std::wstring trace_frame_path_; + + std::atomic worker_running_; + kernel::object_ref worker_thread_; + + std::unique_ptr context_; + SwapMode swap_mode_ = SwapMode::kNormal; + SwapState swap_state_; + std::function swap_request_handler_; + std::queue> pending_fns_; + + uint32_t counter_ = 0; + + uint32_t primary_buffer_ptr_ = 0; + uint32_t primary_buffer_size_ = 0; + + uint32_t read_ptr_index_ = 0; + uint32_t read_ptr_update_freq_ = 0; + uint32_t read_ptr_writeback_ptr_ = 0; + + std::unique_ptr write_ptr_index_event_; + std::atomic write_ptr_index_; + + uint64_t bin_select_ = 0xFFFFFFFFull; + uint64_t bin_mask_ = 0xFFFFFFFFull; +}; + +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_COMMAND_PROCESSOR_H_ diff --git a/src/xenia/gpu/gl4/command_processor.cc b/src/xenia/gpu/gl4/gl4_command_processor.cc similarity index 65% rename from src/xenia/gpu/gl4/command_processor.cc rename to src/xenia/gpu/gl4/gl4_command_processor.cc index 18c61ab25..c56905b7f 100644 --- a/src/xenia/gpu/gl4/command_processor.cc +++ b/src/xenia/gpu/gl4/gl4_command_processor.cc @@ -7,14 +7,13 @@ ****************************************************************************** */ -#include "xenia/gpu/gl4/command_processor.h" +#include "xenia/gpu/gl4/gl4_command_processor.h" #include #include "xenia/base/logging.h" #include "xenia/base/math.h" #include "xenia/base/profiling.h" -#include "xenia/emulator.h" #include "xenia/gpu/gl4/gl4_gpu_flags.h" #include "xenia/gpu/gl4/gl4_graphics_system.h" #include "xenia/gpu/gpu_flags.h" @@ -41,10 +40,10 @@ const GLuint kAnyTarget = UINT_MAX; const size_t kScratchBufferCapacity = 256 * 1024 * 1024; const size_t kScratchBufferAlignment = 256; -CommandProcessor::CachedPipeline::CachedPipeline() +GL4CommandProcessor::CachedPipeline::CachedPipeline() : vertex_program(0), fragment_program(0), handles({0}) {} -CommandProcessor::CachedPipeline::~CachedPipeline() { +GL4CommandProcessor::CachedPipeline::~CachedPipeline() { glDeleteProgramPipelines(1, &handles.default_pipeline); glDeleteProgramPipelines(1, &handles.point_list_pipeline); glDeleteProgramPipelines(1, &handles.rect_list_pipeline); @@ -52,108 +51,15 @@ CommandProcessor::CachedPipeline::~CachedPipeline() { glDeleteProgramPipelines(1, &handles.line_quad_list_pipeline); } -CommandProcessor::CommandProcessor(GL4GraphicsSystem* graphics_system) - : memory_(graphics_system->memory()), - graphics_system_(graphics_system), - register_file_(graphics_system_->register_file()), - trace_writer_(graphics_system->memory()->physical_membase()), - trace_state_(TraceState::kDisabled), - worker_running_(true), - swap_mode_(SwapMode::kNormal), - counter_(0), - primary_buffer_ptr_(0), - primary_buffer_size_(0), - read_ptr_index_(0), - read_ptr_update_freq_(0), - read_ptr_writeback_ptr_(0), - write_ptr_index_event_(xe::threading::Event::CreateAutoResetEvent(false)), - write_ptr_index_(0), - bin_select_(0xFFFFFFFFull), - bin_mask_(0xFFFFFFFFull), - active_vertex_shader_(nullptr), - active_pixel_shader_(nullptr), - active_framebuffer_(nullptr), - last_framebuffer_texture_(0), - point_list_geometry_program_(0), - rect_list_geometry_program_(0), - quad_list_geometry_program_(0), - draw_index_count_(0), +GL4CommandProcessor::GL4CommandProcessor(GL4GraphicsSystem* graphics_system, + kernel::KernelState* kernel_state) + : CommandProcessor(graphics_system, kernel_state), draw_batcher_(graphics_system_->register_file()), scratch_buffer_(kScratchBufferCapacity, kScratchBufferAlignment) {} -CommandProcessor::~CommandProcessor() = default; +GL4CommandProcessor::~GL4CommandProcessor() = default; -bool CommandProcessor::Initialize( - std::unique_ptr context) { - context_ = std::move(context); - - worker_running_ = true; - worker_thread_ = kernel::object_ref( - new kernel::XHostThread(graphics_system_->emulator()->kernel_state(), - 128 * 1024, 0, [this]() { - WorkerThreadMain(); - return 0; - })); - worker_thread_->set_name("GL4 Worker"); - worker_thread_->Create(); - - return true; -} - -void CommandProcessor::Shutdown() { - EndTracing(); - - worker_running_ = false; - write_ptr_index_event_->Set(); - worker_thread_->Wait(0, 0, 0, nullptr); - worker_thread_.reset(); -} - -void CommandProcessor::RequestFrameTrace(const std::wstring& root_path) { - if (trace_state_ == TraceState::kStreaming) { - XELOGE("Streaming trace; cannot also trace frame."); - return; - } - if (trace_state_ == TraceState::kSingleFrame) { - XELOGE("Frame trace already pending; ignoring."); - return; - } - trace_state_ = TraceState::kSingleFrame; - trace_frame_path_ = root_path; -} - -void CommandProcessor::BeginTracing(const std::wstring& root_path) { - if (trace_state_ == TraceState::kStreaming) { - XELOGE("Streaming already active; ignoring request."); - return; - } - if (trace_state_ == TraceState::kSingleFrame) { - XELOGE("Frame trace pending; ignoring streaming request."); - return; - } - std::wstring path = root_path + L"stream"; - trace_state_ = TraceState::kStreaming; - trace_writer_.Open(path); -} - -void CommandProcessor::EndTracing() { - if (!trace_writer_.is_open()) { - return; - } - assert_true(trace_state_ == TraceState::kStreaming); - trace_writer_.Close(); -} - -void CommandProcessor::CallInThread(std::function fn) { - if (pending_fns_.empty() && - kernel::XThread::IsInThread(worker_thread_.get())) { - fn(); - } else { - pending_fns_.push(std::move(fn)); - } -} - -void CommandProcessor::ClearCaches() { +void GL4CommandProcessor::ClearCaches() { texture_cache()->Clear(); for (auto& cached_framebuffer : cached_framebuffers_) { @@ -170,63 +76,11 @@ void CommandProcessor::ClearCaches() { glDeleteTextures(1, &cached_depth_render_target.texture); } cached_depth_render_targets_.clear(); + + CommandProcessor::ClearCaches(); } -void CommandProcessor::WorkerThreadMain() { - context_->MakeCurrent(); - if (!SetupGL()) { - xe::FatalError("Unable to setup command processor GL state"); - return; - } - - while (worker_running_) { - while (!pending_fns_.empty()) { - auto fn = std::move(pending_fns_.front()); - pending_fns_.pop(); - fn(); - } - - uint32_t write_ptr_index = write_ptr_index_.load(); - if (write_ptr_index == 0xBAADF00D || read_ptr_index_ == write_ptr_index) { - SCOPE_profile_cpu_i("gpu", "xe::gpu::gl4::CommandProcessor::Stall"); - // We've run out of commands to execute. - // We spin here waiting for new ones, as the overhead of waiting on our - // event is too high. - PrepareForWait(); - do { - // TODO(benvanik): if we go longer than Nms, switch to waiting? - // It'll keep us from burning power. - // const int wait_time_ms = 5; - // xe::threading::Wait(write_ptr_index_event_.get(), true, - // std::chrono::milliseconds(wait_time_ms)); - xe::threading::MaybeYield(); - write_ptr_index = write_ptr_index_.load(); - } while (worker_running_ && pending_fns_.empty() && - (write_ptr_index == 0xBAADF00D || - read_ptr_index_ == write_ptr_index)); - ReturnFromWait(); - if (!worker_running_ || !pending_fns_.empty()) { - continue; - } - } - assert_true(read_ptr_index_ != write_ptr_index); - - // Execute. Note that we handle wraparound transparently. - ExecutePrimaryBuffer(read_ptr_index_, write_ptr_index); - read_ptr_index_ = write_ptr_index; - - // TODO(benvanik): use reader->Read_update_freq_ and only issue after moving - // that many indices. - if (read_ptr_writeback_ptr_) { - xe::store_and_swap( - memory_->TranslatePhysical(read_ptr_writeback_ptr_), read_ptr_index_); - } - } - - ShutdownGL(); -} - -bool CommandProcessor::SetupGL() { +bool GL4CommandProcessor::SetupContext() { // Circular buffer holding scratch vertex/index data. if (!scratch_buffer_.Initialize()) { XELOGE("Unable to initialize scratch buffer"); @@ -432,7 +286,7 @@ bool CommandProcessor::SetupGL() { return true; } -GLuint CommandProcessor::CreateGeometryProgram(const std::string& source) { +GLuint GL4CommandProcessor::CreateGeometryProgram(const std::string& source) { auto source_str = source.c_str(); GLuint program = glCreateShaderProgramv(GL_GEOMETRY_SHADER, 1, &source_str); @@ -454,7 +308,7 @@ GLuint CommandProcessor::CreateGeometryProgram(const std::string& source) { return program; } -void CommandProcessor::ShutdownGL() { +void GL4CommandProcessor::ShutdownContext() { glDeleteProgram(point_list_geometry_program_); glDeleteProgram(rect_list_geometry_program_); glDeleteProgram(quad_list_geometry_program_); @@ -470,95 +324,21 @@ void CommandProcessor::ShutdownGL() { context_.reset(); } -void CommandProcessor::InitializeRingBuffer(uint32_t ptr, uint32_t page_count) { - primary_buffer_ptr_ = ptr; - // Not sure this is correct, but it's a way to take the page_count back to - // the number of bytes allocated by the physical alloc. - uint32_t original_size = 1 << (0x1C - page_count - 1); - primary_buffer_size_ = original_size; -} - -void CommandProcessor::EnableReadPointerWriteBack(uint32_t ptr, - uint32_t block_size) { - // CP_RB_RPTR_ADDR Ring Buffer Read Pointer Address 0x70C - // ptr = RB_RPTR_ADDR, pointer to write back the address to. - read_ptr_writeback_ptr_ = ptr; - // CP_RB_CNTL Ring Buffer Control 0x704 - // block_size = RB_BLKSZ, number of quadwords read between updates of the - // read pointer. - read_ptr_update_freq_ = - static_cast(pow(2.0, static_cast(block_size)) / 4); -} - -void CommandProcessor::UpdateWritePointer(uint32_t value) { - write_ptr_index_ = value; - write_ptr_index_event_->Set(); -} - -void CommandProcessor::WriteRegister(uint32_t index, uint32_t value) { - RegisterFile* regs = register_file_; - if (index >= RegisterFile::kRegisterCount) { - XELOGW("CommandProcessor::WriteRegister index out of bounds: %d", index); - return; - } - - regs->values[index].u32 = value; - - // If this is a COHER register, set the dirty flag. - // This will block the command processor the next time it WAIT_MEM_REGs and - // allow us to synchronize the memory. - if (index == XE_GPU_REG_COHER_STATUS_HOST) { - regs->values[index].u32 |= 0x80000000ul; - } - - // Scratch register writeback. - if (index >= XE_GPU_REG_SCRATCH_REG0 && index <= XE_GPU_REG_SCRATCH_REG7) { - uint32_t scratch_reg = index - XE_GPU_REG_SCRATCH_REG0; - if ((1 << scratch_reg) & regs->values[XE_GPU_REG_SCRATCH_UMSK].u32) { - // Enabled - write to address. - uint32_t scratch_addr = regs->values[XE_GPU_REG_SCRATCH_ADDR].u32; - uint32_t mem_addr = scratch_addr + (scratch_reg * 4); - xe::store_and_swap(memory_->TranslatePhysical(mem_addr), value); - } - } -} - -void CommandProcessor::MakeCoherent() { - SCOPE_profile_cpu_f("gpu"); - - // Status host often has 0x01000000 or 0x03000000. - // This is likely toggling VC (vertex cache) or TC (texture cache). - // Or, it also has a direction in here maybe - there is probably - // some way to check for dest coherency (what all the COHER_DEST_BASE_* - // registers are for). - // Best docs I've found on this are here: - // http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2013/10/R6xx_R7xx_3D.pdf - // http://cgit.freedesktop.org/xorg/driver/xf86-video-radeonhd/tree/src/r6xx_accel.c?id=3f8b6eccd9dba116cc4801e7f80ce21a879c67d2#n454 - +void GL4CommandProcessor::MakeCoherent() { RegisterFile* regs = register_file_; auto status_host = regs->values[XE_GPU_REG_COHER_STATUS_HOST].u32; - // auto base_host = regs->values[XE_GPU_REG_COHER_BASE_HOST].u32; - // auto size_host = regs->values[XE_GPU_REG_COHER_SIZE_HOST].u32; - if (!(status_host & 0x80000000ul)) { - return; + CommandProcessor::MakeCoherent(); + + if (status_host & 0x80000000ul) { + scratch_buffer_.ClearCache(); } - - // TODO(benvanik): notify resource cache of base->size and type. - // XELOGD("Make %.8X -> %.8X (%db) coherent", base_host, base_host + - // size_host, size_host); - - // Mark coherent. - status_host &= ~0x80000000ul; - regs->values[XE_GPU_REG_COHER_STATUS_HOST].u32 = status_host; - - scratch_buffer_.ClearCache(); } -void CommandProcessor::PrepareForWait() { +void GL4CommandProcessor::PrepareForWait() { SCOPE_profile_cpu_f("gpu"); - trace_writer_.Flush(); + CommandProcessor::PrepareForWait(); // TODO(benvanik): fences and fancy stuff. We should figure out a way to // make interrupt callbacks from the GPU so that we don't have to do a full @@ -571,43 +351,19 @@ void CommandProcessor::PrepareForWait() { } } -void CommandProcessor::ReturnFromWait() { +void GL4CommandProcessor::ReturnFromWait() { if (FLAGS_thread_safe_gl) { context_->MakeCurrent(); } + + CommandProcessor::ReturnFromWait(); } -void CommandProcessor::IssueSwap(uint32_t frontbuffer_ptr, - uint32_t frontbuffer_width, - uint32_t frontbuffer_height) { - SCOPE_profile_cpu_f("gpu"); - if (!swap_request_handler_) { - return; - } - - // If there was a swap pending we drop it on the floor. - // This prevents the display from pulling the backbuffer out from under us. - // If we skip a lot then we may need to buffer more, but as the display - // thread should be fairly idle that shouldn't happen. - if (!FLAGS_vsync) { - std::lock_guard lock(swap_state_.mutex); - if (swap_state_.pending) { - swap_state_.pending = false; - // TODO(benvanik): frame skip counter. - XELOGW("Skipped frame!"); - } - } else { - // Spin until no more pending swap. - while (true) { - { - std::lock_guard lock(swap_state_.mutex); - if (!swap_state_.pending) { - break; - } - } - xe::threading::MaybeYield(); - } - } +void GL4CommandProcessor::PerformSwap(uint32_t frontbuffer_ptr, + uint32_t frontbuffer_width, + uint32_t frontbuffer_height) { + // Ensure we issue any pending draws. + draw_batcher_.Flush(DrawBatcher::FlushMode::kMakeCoherent); // One-time initialization. // TODO(benvanik): move someplace more sane? @@ -615,12 +371,16 @@ void CommandProcessor::IssueSwap(uint32_t frontbuffer_ptr, std::lock_guard lock(swap_state_.mutex); swap_state_.width = frontbuffer_width; swap_state_.height = frontbuffer_height; - glCreateTextures(GL_TEXTURE_2D, 1, &swap_state_.front_buffer_texture); - glCreateTextures(GL_TEXTURE_2D, 1, &swap_state_.back_buffer_texture); - glTextureStorage2D(swap_state_.front_buffer_texture, 1, GL_RGBA8, - swap_state_.width, swap_state_.height); - glTextureStorage2D(swap_state_.back_buffer_texture, 1, GL_RGBA8, - swap_state_.width, swap_state_.height); + GLuint front_buffer_texture; + GLuint back_buffer_texture; + glCreateTextures(GL_TEXTURE_2D, 1, &front_buffer_texture); + glCreateTextures(GL_TEXTURE_2D, 1, &back_buffer_texture); + swap_state_.front_buffer_texture = front_buffer_texture; + swap_state_.back_buffer_texture = back_buffer_texture; + glTextureStorage2D(front_buffer_texture, 1, GL_RGBA8, swap_state_.width, + swap_state_.height); + glTextureStorage2D(back_buffer_texture, 1, GL_RGBA8, swap_state_.width, + swap_state_.height); } // Lookup the framebuffer in the recently-resolved list. @@ -641,8 +401,8 @@ void CommandProcessor::IssueSwap(uint32_t frontbuffer_ptr, reinterpret_cast(context_.get()) ->blitter() ->CopyColorTexture2D(framebuffer_texture, src_rect, - swap_state_.back_buffer_texture, dest_rect, - GL_LINEAR); + static_cast(swap_state_.back_buffer_texture), + dest_rect, GL_LINEAR); if (FLAGS_draw_all_framebuffers) { int32_t offsetx = (1280 - (1280 / 5)); @@ -662,8 +422,10 @@ void CommandProcessor::IssueSwap(uint32_t frontbuffer_ptr, dest_rect = {offsetx, offsety, 1280 / 5, 720 / 5}; reinterpret_cast(context_.get()) ->blitter() - ->CopyColorTexture2D(tex, src_rect, swap_state_.back_buffer_texture, - dest_rect, GL_LINEAR); + ->CopyColorTexture2D( + tex, src_rect, + static_cast(swap_state_.back_buffer_texture), dest_rect, + GL_LINEAR); offsety += 720 / 5; } @@ -684,8 +446,10 @@ void CommandProcessor::IssueSwap(uint32_t frontbuffer_ptr, 720 / 5}; reinterpret_cast(context_.get()) ->blitter() - ->CopyColorTexture2D(tex, src_rect, swap_state_.back_buffer_texture, - dest_rect, GL_LINEAR); + ->CopyColorTexture2D( + tex, src_rect, + static_cast(swap_state_.back_buffer_texture), dest_rect, + GL_LINEAR); doffsetx += 1280 / 5; } @@ -695,857 +459,14 @@ void CommandProcessor::IssueSwap(uint32_t frontbuffer_ptr, // TODO(benvanik): prevent this? fences? glFinish(); - { - // Set pending so that the display will swap the next time it can. - std::lock_guard lock(swap_state_.mutex); - swap_state_.pending = true; - } - - // Notify the display a swap is pending so that our changes are picked up. - // It does the actual front/back buffer swap. - swap_request_handler_(); - // Remove any dead textures, etc. texture_cache_.Scavenge(); } -class CommandProcessor::RingbufferReader { - public: - RingbufferReader(uint8_t* membase, uint32_t base_ptr, uint32_t ptr_mask, - uint32_t start_ptr, uint32_t end_ptr) - : membase_(membase), - base_ptr_(base_ptr), - ptr_mask_(ptr_mask), - end_ptr_(end_ptr), - ptr_(start_ptr), - offset_(0) {} - - uint32_t ptr() const { return ptr_; } - uint32_t offset() const { return offset_; } - bool can_read() const { return ptr_ != end_ptr_; } - - uint32_t Peek() { return xe::load_and_swap(membase_ + ptr_); } - - void CheckRead(uint32_t words) { - assert_true(ptr_ + words * sizeof(uint32_t) <= end_ptr_); - } - - uint32_t Read() { - uint32_t value = xe::load_and_swap(membase_ + ptr_); - Advance(1); - return value; - } - - void Advance(uint32_t words) { - offset_ += words; - ptr_ = ptr_ + words * sizeof(uint32_t); - if (ptr_mask_) { - ptr_ = base_ptr_ + - (((ptr_ - base_ptr_) / sizeof(uint32_t)) & ptr_mask_) * - sizeof(uint32_t); - } - } - - void Skip(uint32_t words) { Advance(words); } - - private: - uint8_t* membase_; - - uint32_t base_ptr_; - uint32_t ptr_mask_; - uint32_t end_ptr_; - uint32_t ptr_; - uint32_t offset_; -}; - -void CommandProcessor::ExecutePrimaryBuffer(uint32_t start_index, - uint32_t end_index) { - SCOPE_profile_cpu_f("gpu"); - - // Adjust pointer base. - uint32_t start_ptr = primary_buffer_ptr_ + start_index * sizeof(uint32_t); - start_ptr = (primary_buffer_ptr_ & ~0x1FFFFFFF) | (start_ptr & 0x1FFFFFFF); - uint32_t end_ptr = primary_buffer_ptr_ + end_index * sizeof(uint32_t); - end_ptr = (primary_buffer_ptr_ & ~0x1FFFFFFF) | (end_ptr & 0x1FFFFFFF); - - trace_writer_.WritePrimaryBufferStart(start_ptr, end_index - start_index); - - // Execute commands! - uint32_t ptr_mask = (primary_buffer_size_ / sizeof(uint32_t)) - 1; - RingbufferReader reader(memory_->physical_membase(), primary_buffer_ptr_, - ptr_mask, start_ptr, end_ptr); - while (reader.can_read()) { - ExecutePacket(&reader); - } - if (end_index > start_index) { - assert_true(reader.offset() == (end_index - start_index)); - } - - trace_writer_.WritePrimaryBufferEnd(); -} - -void CommandProcessor::ExecuteIndirectBuffer(uint32_t ptr, uint32_t length) { - SCOPE_profile_cpu_f("gpu"); - - trace_writer_.WriteIndirectBufferStart(ptr, length / sizeof(uint32_t)); - - // Execute commands! - uint32_t ptr_mask = 0; - RingbufferReader reader(memory_->physical_membase(), primary_buffer_ptr_, - ptr_mask, ptr, ptr + length * sizeof(uint32_t)); - while (reader.can_read()) { - ExecutePacket(&reader); - } - - trace_writer_.WriteIndirectBufferEnd(); -} - -void CommandProcessor::ExecutePacket(uint32_t ptr, uint32_t count) { - uint32_t ptr_mask = 0; - RingbufferReader reader(memory_->physical_membase(), primary_buffer_ptr_, - ptr_mask, ptr, ptr + count * sizeof(uint32_t)); - while (reader.can_read()) { - ExecutePacket(&reader); - } -} - -bool CommandProcessor::ExecutePacket(RingbufferReader* reader) { - const uint32_t packet = reader->Read(); - const uint32_t packet_type = packet >> 30; - if (packet == 0) { - trace_writer_.WritePacketStart(reader->ptr() - 4, 1); - trace_writer_.WritePacketEnd(); - return true; - } - - switch (packet_type) { - case 0x00: - return ExecutePacketType0(reader, packet); - case 0x01: - return ExecutePacketType1(reader, packet); - case 0x02: - return ExecutePacketType2(reader, packet); - case 0x03: - return ExecutePacketType3(reader, packet); - default: - assert_unhandled_case(packet_type); - return false; - } -} - -bool CommandProcessor::ExecutePacketType0(RingbufferReader* reader, - uint32_t packet) { - // Type-0 packet. - // Write count registers in sequence to the registers starting at - // (base_index << 2). - - uint32_t count = ((packet >> 16) & 0x3FFF) + 1; - trace_writer_.WritePacketStart(reader->ptr() - 4, 1 + count); - - uint32_t base_index = (packet & 0x7FFF); - uint32_t write_one_reg = (packet >> 15) & 0x1; - for (uint32_t m = 0; m < count; m++) { - uint32_t reg_data = reader->Read(); - uint32_t target_index = write_one_reg ? base_index : base_index + m; - WriteRegister(target_index, reg_data); - } - - trace_writer_.WritePacketEnd(); - return true; -} - -bool CommandProcessor::ExecutePacketType1(RingbufferReader* reader, - uint32_t packet) { - // Type-1 packet. - // Contains two registers of data. Type-0 should be more common. - trace_writer_.WritePacketStart(reader->ptr() - 4, 3); - uint32_t reg_index_1 = packet & 0x7FF; - uint32_t reg_index_2 = (packet >> 11) & 0x7FF; - uint32_t reg_data_1 = reader->Read(); - uint32_t reg_data_2 = reader->Read(); - WriteRegister(reg_index_1, reg_data_1); - WriteRegister(reg_index_2, reg_data_2); - trace_writer_.WritePacketEnd(); - return true; -} - -bool CommandProcessor::ExecutePacketType2(RingbufferReader* reader, - uint32_t packet) { - // Type-2 packet. - // No-op. Do nothing. - trace_writer_.WritePacketStart(reader->ptr() - 4, 1); - trace_writer_.WritePacketEnd(); - return true; -} - -bool CommandProcessor::ExecutePacketType3(RingbufferReader* reader, - uint32_t packet) { - // Type-3 packet. - uint32_t opcode = (packet >> 8) & 0x7F; - uint32_t count = ((packet >> 16) & 0x3FFF) + 1; - auto data_start_offset = reader->offset(); - - // To handle nesting behavior when tracing we special case indirect buffers. - if (opcode == PM4_INDIRECT_BUFFER) { - trace_writer_.WritePacketStart(reader->ptr() - 4, 2); - } else { - trace_writer_.WritePacketStart(reader->ptr() - 4, 1 + count); - } - - // & 1 == predicate - when set, we do bin check to see if we should execute - // the packet. Only type 3 packets are affected. - // We also skip predicated swaps, as they are never valid (probably?). - if (packet & 1) { - bool any_pass = (bin_select_ & bin_mask_) != 0; - if (!any_pass || opcode == PM4_XE_SWAP) { - reader->Skip(count); - trace_writer_.WritePacketEnd(); - return true; - } - } - - bool result = false; - switch (opcode) { - case PM4_ME_INIT: - result = ExecutePacketType3_ME_INIT(reader, packet, count); - break; - case PM4_NOP: - result = ExecutePacketType3_NOP(reader, packet, count); - break; - case PM4_INTERRUPT: - result = ExecutePacketType3_INTERRUPT(reader, packet, count); - break; - case PM4_XE_SWAP: - result = ExecutePacketType3_XE_SWAP(reader, packet, count); - break; - case PM4_INDIRECT_BUFFER: - result = ExecutePacketType3_INDIRECT_BUFFER(reader, packet, count); - break; - case PM4_WAIT_REG_MEM: - result = ExecutePacketType3_WAIT_REG_MEM(reader, packet, count); - break; - case PM4_REG_RMW: - result = ExecutePacketType3_REG_RMW(reader, packet, count); - break; - case PM4_COND_WRITE: - result = ExecutePacketType3_COND_WRITE(reader, packet, count); - break; - case PM4_EVENT_WRITE: - result = ExecutePacketType3_EVENT_WRITE(reader, packet, count); - break; - case PM4_EVENT_WRITE_SHD: - result = ExecutePacketType3_EVENT_WRITE_SHD(reader, packet, count); - break; - case PM4_EVENT_WRITE_EXT: - result = ExecutePacketType3_EVENT_WRITE_EXT(reader, packet, count); - break; - case PM4_DRAW_INDX: - result = ExecutePacketType3_DRAW_INDX(reader, packet, count); - break; - case PM4_DRAW_INDX_2: - result = ExecutePacketType3_DRAW_INDX_2(reader, packet, count); - break; - case PM4_SET_CONSTANT: - result = ExecutePacketType3_SET_CONSTANT(reader, packet, count); - break; - case PM4_SET_CONSTANT2: - result = ExecutePacketType3_SET_CONSTANT2(reader, packet, count); - break; - case PM4_LOAD_ALU_CONSTANT: - result = ExecutePacketType3_LOAD_ALU_CONSTANT(reader, packet, count); - break; - case PM4_SET_SHADER_CONSTANTS: - result = ExecutePacketType3_SET_SHADER_CONSTANTS(reader, packet, count); - break; - case PM4_IM_LOAD: - result = ExecutePacketType3_IM_LOAD(reader, packet, count); - break; - case PM4_IM_LOAD_IMMEDIATE: - result = ExecutePacketType3_IM_LOAD_IMMEDIATE(reader, packet, count); - break; - case PM4_INVALIDATE_STATE: - result = ExecutePacketType3_INVALIDATE_STATE(reader, packet, count); - break; - - case PM4_SET_BIN_MASK_LO: { - uint32_t value = reader->Read(); - bin_mask_ = (bin_mask_ & 0xFFFFFFFF00000000ull) | value; - result = true; - } break; - case PM4_SET_BIN_MASK_HI: { - uint32_t value = reader->Read(); - bin_mask_ = - (bin_mask_ & 0xFFFFFFFFull) | (static_cast(value) << 32); - result = true; - } break; - case PM4_SET_BIN_SELECT_LO: { - uint32_t value = reader->Read(); - bin_select_ = (bin_select_ & 0xFFFFFFFF00000000ull) | value; - result = true; - } break; - case PM4_SET_BIN_SELECT_HI: { - uint32_t value = reader->Read(); - bin_select_ = - (bin_select_ & 0xFFFFFFFFull) | (static_cast(value) << 32); - result = true; - } break; - - // Ignored packets - useful if breaking on the default handler below. - case 0x50: // 0xC0015000 usually 2 words, 0xFFFFFFFF / 0x00000000 - case 0x51: // 0xC0015100 usually 2 words, 0xFFFFFFFF / 0xFFFFFFFF - reader->Skip(count); - break; - - default: - reader->Skip(count); - break; - } - - trace_writer_.WritePacketEnd(); - assert_true(reader->offset() == data_start_offset + count); - return result; -} - -bool CommandProcessor::ExecutePacketType3_ME_INIT(RingbufferReader* reader, - uint32_t packet, - uint32_t count) { - // initialize CP's micro-engine - reader->Advance(count); - return true; -} - -bool CommandProcessor::ExecutePacketType3_NOP(RingbufferReader* reader, - uint32_t packet, uint32_t count) { - // skip N 32-bit words to get to the next packet - // No-op, ignore some data. - reader->Advance(count); - return true; -} - -bool CommandProcessor::ExecutePacketType3_INTERRUPT(RingbufferReader* reader, - uint32_t packet, - uint32_t count) { - SCOPE_profile_cpu_f("gpu"); - - // generate interrupt from the command stream - uint32_t cpu_mask = reader->Read(); - for (int n = 0; n < 6; n++) { - if (cpu_mask & (1 << n)) { - graphics_system_->DispatchInterruptCallback(1, n); - } - } - return true; -} - -bool CommandProcessor::ExecutePacketType3_XE_SWAP(RingbufferReader* reader, - uint32_t packet, - uint32_t count) { - SCOPE_profile_cpu_f("gpu"); - - XELOGI("XE_SWAP"); - - // Xenia-specific VdSwap hook. - // VdSwap will post this to tell us we need to swap the screen/fire an - // interrupt. - // 63 words here, but only the first has any data. - uint32_t magic = reader->Read(); - assert_true(magic == 'SWAP'); - - // TODO(benvanik): only swap frontbuffer ptr. - uint32_t frontbuffer_ptr = reader->Read(); - uint32_t frontbuffer_width = reader->Read(); - uint32_t frontbuffer_height = reader->Read(); - reader->Advance(count - 4); - - // Ensure we issue any pending draws. - draw_batcher_.Flush(DrawBatcher::FlushMode::kMakeCoherent); - - if (swap_mode_ == SwapMode::kNormal) { - IssueSwap(frontbuffer_ptr, frontbuffer_width, frontbuffer_height); - } - - if (trace_writer_.is_open()) { - trace_writer_.WriteEvent(EventType::kSwap); - trace_writer_.Flush(); - if (trace_state_ == TraceState::kSingleFrame) { - trace_state_ = TraceState::kDisabled; - trace_writer_.Close(); - } - } else if (trace_state_ == TraceState::kSingleFrame) { - // New trace request - we only start tracing at the beginning of a frame. - auto frame_number = L"frame_" + std::to_wstring(counter_); - auto path = trace_frame_path_ + frame_number; - trace_writer_.Open(path); - } - ++counter_; - return true; -} - -bool CommandProcessor::ExecutePacketType3_INDIRECT_BUFFER( - RingbufferReader* reader, uint32_t packet, uint32_t count) { - // indirect buffer dispatch - uint32_t list_ptr = CpuToGpu(reader->Read()); - uint32_t list_length = reader->Read(); - ExecuteIndirectBuffer(GpuToCpu(list_ptr), list_length); - return true; -} - -bool CommandProcessor::ExecutePacketType3_WAIT_REG_MEM(RingbufferReader* reader, - uint32_t packet, - uint32_t count) { - SCOPE_profile_cpu_f("gpu"); - - // wait until a register or memory location is a specific value - uint32_t wait_info = reader->Read(); - uint32_t poll_reg_addr = reader->Read(); - uint32_t ref = reader->Read(); - uint32_t mask = reader->Read(); - uint32_t wait = reader->Read(); - bool matched = false; - do { - uint32_t value; - if (wait_info & 0x10) { - // Memory. - auto endianness = static_cast(poll_reg_addr & 0x3); - poll_reg_addr &= ~0x3; - value = xe::load(memory_->TranslatePhysical(poll_reg_addr)); - value = GpuSwap(value, endianness); - trace_writer_.WriteMemoryRead(CpuToGpu(poll_reg_addr), 4); - } else { - // Register. - assert_true(poll_reg_addr < RegisterFile::kRegisterCount); - value = register_file_->values[poll_reg_addr].u32; - if (poll_reg_addr == XE_GPU_REG_COHER_STATUS_HOST) { - MakeCoherent(); - value = register_file_->values[poll_reg_addr].u32; - } - } - switch (wait_info & 0x7) { - case 0x0: // Never. - matched = false; - break; - case 0x1: // Less than reference. - matched = (value & mask) < ref; - break; - case 0x2: // Less than or equal to reference. - matched = (value & mask) <= ref; - break; - case 0x3: // Equal to reference. - matched = (value & mask) == ref; - break; - case 0x4: // Not equal to reference. - matched = (value & mask) != ref; - break; - case 0x5: // Greater than or equal to reference. - matched = (value & mask) >= ref; - break; - case 0x6: // Greater than reference. - matched = (value & mask) > ref; - break; - case 0x7: // Always - matched = true; - break; - } - if (!matched) { - // Wait. - if (wait >= 0x100) { - PrepareForWait(); - if (!FLAGS_vsync) { - // User wants it fast and dangerous. - xe::threading::MaybeYield(); - } else { - xe::threading::Sleep(std::chrono::milliseconds(wait / 0x100)); - } - xe::threading::SyncMemory(); - ReturnFromWait(); - } else { - xe::threading::MaybeYield(); - } - } - } while (!matched); - return true; -} - -bool CommandProcessor::ExecutePacketType3_REG_RMW(RingbufferReader* reader, - - uint32_t packet, - uint32_t count) { - // register read/modify/write - // ? (used during shader upload and edram setup) - uint32_t rmw_info = reader->Read(); - uint32_t and_mask = reader->Read(); - uint32_t or_mask = reader->Read(); - uint32_t value = register_file_->values[rmw_info & 0x1FFF].u32; - if ((rmw_info >> 30) & 0x1) { - // | reg - value |= register_file_->values[or_mask & 0x1FFF].u32; - } else { - // | imm - value |= or_mask; - } - if ((rmw_info >> 31) & 0x1) { - // & reg - value &= register_file_->values[and_mask & 0x1FFF].u32; - } else { - // & imm - value &= and_mask; - } - WriteRegister(rmw_info & 0x1FFF, value); - return true; -} - -bool CommandProcessor::ExecutePacketType3_COND_WRITE(RingbufferReader* reader, - uint32_t packet, - uint32_t count) { - // conditional write to memory or register - uint32_t wait_info = reader->Read(); - uint32_t poll_reg_addr = reader->Read(); - uint32_t ref = reader->Read(); - uint32_t mask = reader->Read(); - uint32_t write_reg_addr = reader->Read(); - uint32_t write_data = reader->Read(); - uint32_t value; - if (wait_info & 0x10) { - // Memory. - auto endianness = static_cast(poll_reg_addr & 0x3); - poll_reg_addr &= ~0x3; - trace_writer_.WriteMemoryRead(CpuToGpu(poll_reg_addr), 4); - value = xe::load(memory_->TranslatePhysical(poll_reg_addr)); - value = GpuSwap(value, endianness); - } else { - // Register. - assert_true(poll_reg_addr < RegisterFile::kRegisterCount); - value = register_file_->values[poll_reg_addr].u32; - } - bool matched = false; - switch (wait_info & 0x7) { - case 0x0: // Never. - matched = false; - break; - case 0x1: // Less than reference. - matched = (value & mask) < ref; - break; - case 0x2: // Less than or equal to reference. - matched = (value & mask) <= ref; - break; - case 0x3: // Equal to reference. - matched = (value & mask) == ref; - break; - case 0x4: // Not equal to reference. - matched = (value & mask) != ref; - break; - case 0x5: // Greater than or equal to reference. - matched = (value & mask) >= ref; - break; - case 0x6: // Greater than reference. - matched = (value & mask) > ref; - break; - case 0x7: // Always - matched = true; - break; - } - if (matched) { - // Write. - if (wait_info & 0x100) { - // Memory. - auto endianness = static_cast(write_reg_addr & 0x3); - write_reg_addr &= ~0x3; - write_data = GpuSwap(write_data, endianness); - xe::store(memory_->TranslatePhysical(write_reg_addr), write_data); - trace_writer_.WriteMemoryWrite(CpuToGpu(write_reg_addr), 4); - } else { - // Register. - WriteRegister(write_reg_addr, write_data); - } - } - return true; -} - -bool CommandProcessor::ExecutePacketType3_EVENT_WRITE(RingbufferReader* reader, - uint32_t packet, - uint32_t count) { - // generate an event that creates a write to memory when completed - uint32_t initiator = reader->Read(); - // Writeback initiator. - WriteRegister(XE_GPU_REG_VGT_EVENT_INITIATOR, initiator & 0x3F); - if (count == 1) { - // Just an event flag? Where does this write? - } else { - // Write to an address. - assert_always(); - reader->Advance(count - 1); - } - return true; -} - -bool CommandProcessor::ExecutePacketType3_EVENT_WRITE_SHD( - RingbufferReader* reader, uint32_t packet, uint32_t count) { - // generate a VS|PS_done event - uint32_t initiator = reader->Read(); - uint32_t address = reader->Read(); - uint32_t value = reader->Read(); - // Writeback initiator. - WriteRegister(XE_GPU_REG_VGT_EVENT_INITIATOR, initiator & 0x3F); - uint32_t data_value; - if ((initiator >> 31) & 0x1) { - // Write counter (GPU vblank counter?). - data_value = counter_; - } else { - // Write value. - data_value = value; - } - auto endianness = static_cast(address & 0x3); - address &= ~0x3; - data_value = GpuSwap(data_value, endianness); - xe::store(memory_->TranslatePhysical(address), data_value); - trace_writer_.WriteMemoryWrite(CpuToGpu(address), 4); - return true; -} - -bool CommandProcessor::ExecutePacketType3_EVENT_WRITE_EXT( - RingbufferReader* reader, uint32_t packet, uint32_t count) { - // generate a screen extent event - uint32_t initiator = reader->Read(); - uint32_t address = reader->Read(); - // Writeback initiator. - WriteRegister(XE_GPU_REG_VGT_EVENT_INITIATOR, initiator & 0x3F); - auto endianness = static_cast(address & 0x3); - address &= ~0x3; - // Let us hope we can fake this. - uint16_t extents[] = { - 0 >> 3, // min x - 2560 >> 3, // max x - 0 >> 3, // min y - 2560 >> 3, // max y - 0, // min z - 1, // max z - }; - assert_true(endianness == xenos::Endian::k8in16); - xe::copy_and_swap_16_aligned( - reinterpret_cast(memory_->TranslatePhysical(address)), extents, - xe::countof(extents)); - trace_writer_.WriteMemoryWrite(CpuToGpu(address), sizeof(extents)); - return true; -} - -bool CommandProcessor::ExecutePacketType3_DRAW_INDX(RingbufferReader* reader, - uint32_t packet, - uint32_t count) { - // initiate fetch of index buffer and draw - // dword0 = viz query info - /*uint32_t dword0 =*/reader->Read(); - uint32_t dword1 = reader->Read(); - uint32_t index_count = dword1 >> 16; - auto prim_type = static_cast(dword1 & 0x3F); - uint32_t src_sel = (dword1 >> 6) & 0x3; - if (src_sel == 0x0) { - // Indexed draw. - index_buffer_info_.guest_base = reader->Read(); - uint32_t index_size = reader->Read(); - index_buffer_info_.endianness = static_cast(index_size >> 30); - index_size &= 0x00FFFFFF; - bool index_32bit = (dword1 >> 11) & 0x1; - index_buffer_info_.format = - index_32bit ? IndexFormat::kInt32 : IndexFormat::kInt16; - index_size *= index_32bit ? 4 : 2; - index_buffer_info_.length = index_size; - index_buffer_info_.count = index_count; - } else if (src_sel == 0x2) { - // Auto draw. - index_buffer_info_.guest_base = 0; - index_buffer_info_.length = 0; - } else { - // Unknown source select. - assert_always(); - } - draw_index_count_ = index_count; - - bool draw_valid = false; - if (src_sel == 0x0) { - // Indexed draw. - draw_valid = draw_batcher_.BeginDrawElements(prim_type, index_count, - index_buffer_info_.format); - } else if (src_sel == 0x2) { - // Auto draw. - draw_valid = draw_batcher_.BeginDrawArrays(prim_type, index_count); - } else { - // Unknown source select. - assert_always(); - } - if (!draw_valid) { - return false; - } - return IssueDraw(); -} - -bool CommandProcessor::ExecutePacketType3_DRAW_INDX_2(RingbufferReader* reader, - uint32_t packet, - uint32_t count) { - // draw using supplied indices in packet - uint32_t dword0 = reader->Read(); - uint32_t index_count = dword0 >> 16; - auto prim_type = static_cast(dword0 & 0x3F); - uint32_t src_sel = (dword0 >> 6) & 0x3; - assert_true(src_sel == 0x2); // 'SrcSel=AutoIndex' - // Index buffer unused as automatic. - // bool index_32bit = (dword0 >> 11) & 0x1; - // uint32_t indices_size = index_count * (index_32bit ? 4 : 2); - // uint32_t index_ptr = reader->ptr(); - index_buffer_info_.guest_base = 0; - index_buffer_info_.length = 0; - reader->Advance(count - 1); - draw_index_count_ = index_count; - bool draw_valid = draw_batcher_.BeginDrawArrays(prim_type, index_count); - if (!draw_valid) { - return false; - } - return IssueDraw(); -} - -bool CommandProcessor::ExecutePacketType3_SET_CONSTANT(RingbufferReader* reader, - uint32_t packet, - uint32_t count) { - // load constant into chip and to memory - // PM4_REG(reg) ((0x4 << 16) | (GSL_HAL_SUBBLOCK_OFFSET(reg))) - // reg - 0x2000 - uint32_t offset_type = reader->Read(); - uint32_t index = offset_type & 0x7FF; - uint32_t type = (offset_type >> 16) & 0xFF; - switch (type) { - case 0: // ALU - index += 0x4000; - break; - case 1: // FETCH - index += 0x4800; - break; - case 2: // BOOL - index += 0x4900; - break; - case 3: // LOOP - index += 0x4908; - break; - case 4: // REGISTERS - index += 0x2000; - break; - default: - assert_always(); - reader->Skip(count - 1); - return true; - } - for (uint32_t n = 0; n < count - 1; n++, index++) { - uint32_t data = reader->Read(); - WriteRegister(index, data); - } - return true; -} - -bool CommandProcessor::ExecutePacketType3_SET_CONSTANT2( - RingbufferReader* reader, uint32_t packet, uint32_t count) { - uint32_t offset_type = reader->Read(); - uint32_t index = offset_type & 0xFFFF; - for (uint32_t n = 0; n < count - 1; n++, index++) { - uint32_t data = reader->Read(); - WriteRegister(index, data); - } - return true; -} - -bool CommandProcessor::ExecutePacketType3_LOAD_ALU_CONSTANT( - RingbufferReader* reader, uint32_t packet, uint32_t count) { - // load constants from memory - uint32_t address = reader->Read(); - address &= 0x3FFFFFFF; - uint32_t offset_type = reader->Read(); - uint32_t index = offset_type & 0x7FF; - uint32_t size_dwords = reader->Read(); - size_dwords &= 0xFFF; - uint32_t type = (offset_type >> 16) & 0xFF; - switch (type) { - case 0: // ALU - index += 0x4000; - break; - case 1: // FETCH - index += 0x4800; - break; - case 2: // BOOL - index += 0x4900; - break; - case 3: // LOOP - index += 0x4908; - break; - case 4: // REGISTERS - index += 0x2000; - break; - default: - assert_always(); - return true; - } - trace_writer_.WriteMemoryRead(CpuToGpu(address), size_dwords * 4); - for (uint32_t n = 0; n < size_dwords; n++, index++) { - uint32_t data = xe::load_and_swap( - memory_->TranslatePhysical(address + n * 4)); - WriteRegister(index, data); - } - return true; -} - -bool CommandProcessor::ExecutePacketType3_SET_SHADER_CONSTANTS( - RingbufferReader* reader, uint32_t packet, uint32_t count) { - uint32_t offset_type = reader->Read(); - uint32_t index = offset_type & 0xFFFF; - for (uint32_t n = 0; n < count - 1; n++, index++) { - uint32_t data = reader->Read(); - WriteRegister(index, data); - } - return true; -} - -bool CommandProcessor::ExecutePacketType3_IM_LOAD(RingbufferReader* reader, - uint32_t packet, - uint32_t count) { - // load sequencer instruction memory (pointer-based) - uint32_t addr_type = reader->Read(); - auto shader_type = static_cast(addr_type & 0x3); - uint32_t addr = addr_type & ~0x3; - uint32_t start_size = reader->Read(); - uint32_t start = start_size >> 16; - uint32_t size_dwords = start_size & 0xFFFF; // dwords - assert_true(start == 0); - trace_writer_.WriteMemoryRead(CpuToGpu(addr), size_dwords * 4); - LoadShader(shader_type, addr, memory_->TranslatePhysical(addr), - size_dwords); - return true; -} - -bool CommandProcessor::ExecutePacketType3_IM_LOAD_IMMEDIATE( - RingbufferReader* reader, uint32_t packet, uint32_t count) { - // load sequencer instruction memory (code embedded in packet) - uint32_t dword0 = reader->Read(); - uint32_t dword1 = reader->Read(); - auto shader_type = static_cast(dword0); - uint32_t start_size = dword1; - uint32_t start = start_size >> 16; - uint32_t size_dwords = start_size & 0xFFFF; // dwords - assert_true(start == 0); - reader->CheckRead(size_dwords); - LoadShader(shader_type, reader->ptr(), - memory_->TranslatePhysical(reader->ptr()), size_dwords); - reader->Advance(size_dwords); - return true; -} - -bool CommandProcessor::ExecutePacketType3_INVALIDATE_STATE( - RingbufferReader* reader, uint32_t packet, uint32_t count) { - // selective invalidation of state pointers - /*uint32_t mask =*/reader->Read(); - // driver_->InvalidateState(mask); - return true; -} - -bool CommandProcessor::LoadShader(ShaderType shader_type, - uint32_t guest_address, - const uint32_t* host_address, - uint32_t dword_count) { +bool GL4CommandProcessor::LoadShader(ShaderType shader_type, + uint32_t guest_address, + const uint32_t* host_address, + uint32_t dword_count) { // Hash the input memory and lookup the shader. GL4Shader* shader_ptr = nullptr; uint64_t hash = XXH64(host_address, dword_count * sizeof(uint32_t), 0); @@ -1582,11 +503,24 @@ bool CommandProcessor::LoadShader(ShaderType shader_type, return true; } -bool CommandProcessor::IssueDraw() { +bool GL4CommandProcessor::IssueDraw(PrimitiveType prim_type, + uint32_t index_count, + IndexBufferInfo* index_buffer_info) { #if FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // FINE_GRAINED_DRAW_SCOPES + bool draw_valid; + if (index_buffer_info) { + draw_valid = draw_batcher_.BeginDrawElements(prim_type, index_count, + index_buffer_info->format); + } else { + draw_valid = draw_batcher_.BeginDrawArrays(prim_type, index_count); + } + if (!draw_valid) { + return false; + } + auto& regs = *register_file_; auto enable_mode = @@ -1632,7 +566,7 @@ bool CommandProcessor::IssueDraw() { CHECK_ISSUE_UPDATE_STATUS(status, mismatch, "Unable to prepare draw samplers"); - status = PopulateIndexBuffer(); + status = PopulateIndexBuffer(index_buffer_info); CHECK_ISSUE_UPDATE_STATUS(status, mismatch, "Unable to setup index buffer"); status = PopulateVertexBuffers(); CHECK_ISSUE_UPDATE_STATUS(status, mismatch, "Unable to setup vertex buffers"); @@ -1645,8 +579,8 @@ bool CommandProcessor::IssueDraw() { return true; } -bool CommandProcessor::SetShadowRegister(uint32_t* dest, - uint32_t register_name) { +bool GL4CommandProcessor::SetShadowRegister(uint32_t* dest, + uint32_t register_name) { uint32_t value = register_file_->values[register_name].u32; if (*dest == value) { return false; @@ -1655,7 +589,8 @@ bool CommandProcessor::SetShadowRegister(uint32_t* dest, return true; } -bool CommandProcessor::SetShadowRegister(float* dest, uint32_t register_name) { +bool GL4CommandProcessor::SetShadowRegister(float* dest, + uint32_t register_name) { float value = register_file_->values[register_name].f32; if (*dest == value) { return false; @@ -1664,7 +599,7 @@ bool CommandProcessor::SetShadowRegister(float* dest, uint32_t register_name) { return true; } -CommandProcessor::UpdateStatus CommandProcessor::UpdateShaders( +GL4CommandProcessor::UpdateStatus GL4CommandProcessor::UpdateShaders( PrimitiveType prim_type) { auto& regs = update_shaders_regs_; @@ -1815,7 +750,7 @@ CommandProcessor::UpdateStatus CommandProcessor::UpdateShaders( return UpdateStatus::kMismatch; } -CommandProcessor::UpdateStatus CommandProcessor::UpdateRenderTargets() { +GL4CommandProcessor::UpdateStatus GL4CommandProcessor::UpdateRenderTargets() { auto& regs = update_render_targets_regs_; bool dirty = false; @@ -1913,7 +848,7 @@ CommandProcessor::UpdateStatus CommandProcessor::UpdateRenderTargets() { return UpdateStatus::kMismatch; } -CommandProcessor::UpdateStatus CommandProcessor::UpdateState() { +GL4CommandProcessor::UpdateStatus GL4CommandProcessor::UpdateState() { bool mismatch = false; #define CHECK_UPDATE_STATUS(status, mismatch, error_message) \ @@ -1939,7 +874,7 @@ CommandProcessor::UpdateStatus CommandProcessor::UpdateState() { return mismatch ? UpdateStatus::kMismatch : UpdateStatus::kCompatible; } -CommandProcessor::UpdateStatus CommandProcessor::UpdateViewportState() { +GL4CommandProcessor::UpdateStatus GL4CommandProcessor::UpdateViewportState() { auto& regs = update_viewport_state_regs_; bool dirty = false; @@ -2097,7 +1032,7 @@ CommandProcessor::UpdateStatus CommandProcessor::UpdateViewportState() { return UpdateStatus::kMismatch; } -CommandProcessor::UpdateStatus CommandProcessor::UpdateRasterizerState() { +GL4CommandProcessor::UpdateStatus GL4CommandProcessor::UpdateRasterizerState() { auto& regs = update_rasterizer_state_regs_; bool dirty = false; @@ -2185,7 +1120,7 @@ CommandProcessor::UpdateStatus CommandProcessor::UpdateRasterizerState() { return UpdateStatus::kMismatch; } -CommandProcessor::UpdateStatus CommandProcessor::UpdateBlendState() { +GL4CommandProcessor::UpdateStatus GL4CommandProcessor::UpdateBlendState() { auto& reg_file = *register_file_; auto& regs = update_blend_state_regs_; @@ -2280,7 +1215,8 @@ CommandProcessor::UpdateStatus CommandProcessor::UpdateBlendState() { return UpdateStatus::kMismatch; } -CommandProcessor::UpdateStatus CommandProcessor::UpdateDepthStencilState() { +GL4CommandProcessor::UpdateStatus +GL4CommandProcessor::UpdateDepthStencilState() { auto& regs = update_depth_stencil_state_regs_; bool dirty = false; @@ -2377,13 +1313,14 @@ CommandProcessor::UpdateStatus CommandProcessor::UpdateDepthStencilState() { return UpdateStatus::kMismatch; } -CommandProcessor::UpdateStatus CommandProcessor::PopulateIndexBuffer() { +GL4CommandProcessor::UpdateStatus GL4CommandProcessor::PopulateIndexBuffer( + IndexBufferInfo* index_buffer_info) { auto& regs = *register_file_; - auto& info = index_buffer_info_; - if (!info.guest_base) { + if (!index_buffer_info || !index_buffer_info->guest_base) { // No index buffer or auto draw. return UpdateStatus::kCompatible; } + auto& info = *index_buffer_info; #if FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); @@ -2425,7 +1362,7 @@ CommandProcessor::UpdateStatus CommandProcessor::PopulateIndexBuffer() { return UpdateStatus::kCompatible; } -CommandProcessor::UpdateStatus CommandProcessor::PopulateVertexBuffers() { +GL4CommandProcessor::UpdateStatus GL4CommandProcessor::PopulateVertexBuffers() { #if FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // FINE_GRAINED_DRAW_SCOPES @@ -2488,7 +1425,7 @@ CommandProcessor::UpdateStatus CommandProcessor::PopulateVertexBuffers() { return UpdateStatus::kCompatible; } -CommandProcessor::UpdateStatus CommandProcessor::PopulateSamplers() { +GL4CommandProcessor::UpdateStatus GL4CommandProcessor::PopulateSamplers() { #if FINE_GRAINED_DRAW_SCOPES SCOPE_profile_cpu_f("gpu"); #endif // FINE_GRAINED_DRAW_SCOPES @@ -2534,7 +1471,7 @@ CommandProcessor::UpdateStatus CommandProcessor::PopulateSamplers() { return mismatch ? UpdateStatus::kMismatch : UpdateStatus::kCompatible; } -CommandProcessor::UpdateStatus CommandProcessor::PopulateSampler( +GL4CommandProcessor::UpdateStatus GL4CommandProcessor::PopulateSampler( const Shader::SamplerDesc& desc) { auto& regs = *register_file_; int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + desc.fetch_slot * 6; @@ -2583,7 +1520,7 @@ CommandProcessor::UpdateStatus CommandProcessor::PopulateSampler( return UpdateStatus::kCompatible; } -bool CommandProcessor::IssueCopy() { +bool GL4CommandProcessor::IssueCopy() { SCOPE_profile_cpu_f("gpu"); auto& regs = *register_file_; @@ -2968,10 +1905,9 @@ bool CommandProcessor::IssueCopy() { return true; } -GLuint CommandProcessor::GetColorRenderTarget(uint32_t pitch, - MsaaSamples samples, - uint32_t base, - ColorRenderTargetFormat format) { +GLuint GL4CommandProcessor::GetColorRenderTarget( + uint32_t pitch, MsaaSamples samples, uint32_t base, + ColorRenderTargetFormat format) { // Because we don't know the height of anything, we allocate at full res. // At 2560x2560, it's impossible for EDRAM to fit anymore. uint32_t width = 2560; @@ -3039,10 +1975,9 @@ GLuint CommandProcessor::GetColorRenderTarget(uint32_t pitch, return cached->texture; } -GLuint CommandProcessor::GetDepthRenderTarget(uint32_t pitch, - MsaaSamples samples, - uint32_t base, - DepthRenderTargetFormat format) { +GLuint GL4CommandProcessor::GetDepthRenderTarget( + uint32_t pitch, MsaaSamples samples, uint32_t base, + DepthRenderTargetFormat format) { uint32_t width = 2560; uint32_t height = 2560; @@ -3080,7 +2015,7 @@ GLuint CommandProcessor::GetDepthRenderTarget(uint32_t pitch, return cached->texture; } -CommandProcessor::CachedFramebuffer* CommandProcessor::GetFramebuffer( +GL4CommandProcessor::CachedFramebuffer* GL4CommandProcessor::GetFramebuffer( GLuint color_targets[4], GLuint depth_target) { for (auto it = cached_framebuffers_.begin(); it != cached_framebuffers_.end(); ++it) { diff --git a/src/xenia/gpu/gl4/command_processor.h b/src/xenia/gpu/gl4/gl4_command_processor.h similarity index 50% rename from src/xenia/gpu/gl4/command_processor.h rename to src/xenia/gpu/gl4/gl4_command_processor.h index 804b1069d..b18e0c200 100644 --- a/src/xenia/gpu/gl4/command_processor.h +++ b/src/xenia/gpu/gl4/gl4_command_processor.h @@ -21,82 +21,31 @@ #include #include "xenia/base/threading.h" +#include "xenia/gpu/command_processor.h" #include "xenia/gpu/gl4/draw_batcher.h" #include "xenia/gpu/gl4/gl4_shader.h" #include "xenia/gpu/gl4/gl4_shader_translator.h" #include "xenia/gpu/gl4/texture_cache.h" #include "xenia/gpu/register_file.h" -#include "xenia/gpu/tracing.h" #include "xenia/gpu/xenos.h" #include "xenia/kernel/xthread.h" #include "xenia/memory.h" #include "xenia/ui/gl/circular_buffer.h" #include "xenia/ui/gl/gl_context.h" -namespace xe { -namespace kernel { -class XHostThread; -} // namespace kernel -} // namespace xe - namespace xe { namespace gpu { namespace gl4 { class GL4GraphicsSystem; -struct SwapState { - // Lock must be held when changing data in this structure. - std::mutex mutex; - // Dimensions of the framebuffer textures. Should match window size. - uint32_t width = 0; - uint32_t height = 0; - // Current front buffer, being drawn to the screen. - GLuint front_buffer_texture = 0; - // Current back buffer, being updated by the CP. - GLuint back_buffer_texture = 0; - // Whether the back buffer is dirty and a swap is pending. - bool pending = false; -}; - -enum class SwapMode { - kNormal, - kIgnored, -}; - -class CommandProcessor { +class GL4CommandProcessor : public CommandProcessor { public: - explicit CommandProcessor(GL4GraphicsSystem* graphics_system); - ~CommandProcessor(); + GL4CommandProcessor(GL4GraphicsSystem* graphics_system, + kernel::KernelState* kernel_state); + ~GL4CommandProcessor() override; - uint32_t counter() const { return counter_; } - void increment_counter() { counter_++; } - - bool Initialize(std::unique_ptr context); - void Shutdown(); - void CallInThread(std::function fn); - - void ClearCaches(); - - SwapState& swap_state() { return swap_state_; } - void set_swap_mode(SwapMode swap_mode) { swap_mode_ = swap_mode; } - void IssueSwap(uint32_t frontbuffer_ptr, uint32_t frontbuffer_width, - uint32_t frontbuffer_height); - - void set_swap_request_handler(std::function fn) { - swap_request_handler_ = fn; - } - - void RequestFrameTrace(const std::wstring& root_path); - void BeginTracing(const std::wstring& root_path); - void EndTracing(); - - void InitializeRingBuffer(uint32_t ptr, uint32_t page_count); - void EnableReadPointerWriteBack(uint32_t ptr, uint32_t block_size); - - void UpdateWritePointer(uint32_t value); - - void ExecutePacket(uint32_t ptr, uint32_t count); + void ClearCaches() override; // HACK: for debugging; would be good to have this in a base type. TextureCache* texture_cache() { return &texture_cache_; } @@ -111,8 +60,6 @@ class CommandProcessor { xenos::DepthRenderTargetFormat format); private: - class RingbufferReader; - enum class UpdateStatus { kCompatible, kMismatch, @@ -153,69 +100,22 @@ class CommandProcessor { } handles; }; - void WorkerThreadMain(); - bool SetupGL(); - void ShutdownGL(); + bool SetupContext() override; + void ShutdownContext() override; GLuint CreateGeometryProgram(const std::string& source); - void WriteRegister(uint32_t index, uint32_t value); - void MakeCoherent(); - void PrepareForWait(); - void ReturnFromWait(); + void MakeCoherent() override; + void PrepareForWait() override; + void ReturnFromWait() override; - void ExecutePrimaryBuffer(uint32_t start_index, uint32_t end_index); - void ExecuteIndirectBuffer(uint32_t ptr, uint32_t length); - bool ExecutePacket(RingbufferReader* reader); - bool ExecutePacketType0(RingbufferReader* reader, uint32_t packet); - bool ExecutePacketType1(RingbufferReader* reader, uint32_t packet); - bool ExecutePacketType2(RingbufferReader* reader, uint32_t packet); - bool ExecutePacketType3(RingbufferReader* reader, uint32_t packet); - bool ExecutePacketType3_ME_INIT(RingbufferReader* reader, uint32_t packet, - uint32_t count); - bool ExecutePacketType3_NOP(RingbufferReader* reader, uint32_t packet, - uint32_t count); - bool ExecutePacketType3_INTERRUPT(RingbufferReader* reader, uint32_t packet, - uint32_t count); - bool ExecutePacketType3_XE_SWAP(RingbufferReader* reader, uint32_t packet, - uint32_t count); - bool ExecutePacketType3_INDIRECT_BUFFER(RingbufferReader* reader, - uint32_t packet, uint32_t count); - bool ExecutePacketType3_WAIT_REG_MEM(RingbufferReader* reader, - uint32_t packet, uint32_t count); - bool ExecutePacketType3_REG_RMW(RingbufferReader* reader, uint32_t packet, - uint32_t count); - bool ExecutePacketType3_COND_WRITE(RingbufferReader* reader, uint32_t packet, - uint32_t count); - bool ExecutePacketType3_EVENT_WRITE(RingbufferReader* reader, uint32_t packet, - uint32_t count); - bool ExecutePacketType3_EVENT_WRITE_SHD(RingbufferReader* reader, - uint32_t packet, uint32_t count); - bool ExecutePacketType3_EVENT_WRITE_EXT(RingbufferReader* reader, - uint32_t packet, uint32_t count); - bool ExecutePacketType3_DRAW_INDX(RingbufferReader* reader, uint32_t packet, - uint32_t count); - bool ExecutePacketType3_DRAW_INDX_2(RingbufferReader* reader, uint32_t packet, - uint32_t count); - bool ExecutePacketType3_SET_CONSTANT(RingbufferReader* reader, - uint32_t packet, uint32_t count); - bool ExecutePacketType3_SET_CONSTANT2(RingbufferReader* reader, - uint32_t packet, uint32_t count); - bool ExecutePacketType3_LOAD_ALU_CONSTANT(RingbufferReader* reader, - uint32_t packet, uint32_t count); - bool ExecutePacketType3_SET_SHADER_CONSTANTS(RingbufferReader* reader, - uint32_t packet, uint32_t count); - bool ExecutePacketType3_IM_LOAD(RingbufferReader* reader, uint32_t packet, - uint32_t count); - bool ExecutePacketType3_IM_LOAD_IMMEDIATE(RingbufferReader* reader, - - uint32_t packet, uint32_t count); - bool ExecutePacketType3_INVALIDATE_STATE(RingbufferReader* reader, - uint32_t packet, uint32_t count); + void PerformSwap(uint32_t frontbuffer_ptr, uint32_t frontbuffer_width, + uint32_t frontbuffer_height) override; bool LoadShader(ShaderType shader_type, uint32_t guest_address, - const uint32_t* host_address, uint32_t dword_count); + const uint32_t* host_address, uint32_t dword_count) override; - bool IssueDraw(); + bool IssueDraw(PrimitiveType prim_type, uint32_t index_count, + IndexBufferInfo* index_buffer_info) override; UpdateStatus UpdateShaders(PrimitiveType prim_type); UpdateStatus UpdateRenderTargets(); UpdateStatus UpdateState(); @@ -223,77 +123,32 @@ class CommandProcessor { UpdateStatus UpdateRasterizerState(); UpdateStatus UpdateBlendState(); UpdateStatus UpdateDepthStencilState(); - UpdateStatus PopulateIndexBuffer(); + UpdateStatus PopulateIndexBuffer(IndexBufferInfo* index_buffer_info); UpdateStatus PopulateVertexBuffers(); UpdateStatus PopulateSamplers(); UpdateStatus PopulateSampler(const Shader::SamplerDesc& desc); - bool IssueCopy(); + bool IssueCopy() override; CachedFramebuffer* GetFramebuffer(GLuint color_targets[4], GLuint depth_target); - Memory* memory_; - GL4GraphicsSystem* graphics_system_; - RegisterFile* register_file_; - - TraceWriter trace_writer_; - enum class TraceState { - kDisabled, - kStreaming, - kSingleFrame, - }; - TraceState trace_state_; - std::wstring trace_frame_path_; - - std::atomic worker_running_; - kernel::object_ref worker_thread_; - - std::unique_ptr context_; - SwapMode swap_mode_; - SwapState swap_state_; - std::function swap_request_handler_; - std::queue> pending_fns_; - - uint32_t counter_; - - uint32_t primary_buffer_ptr_; - uint32_t primary_buffer_size_; - - uint32_t read_ptr_index_; - uint32_t read_ptr_update_freq_; - uint32_t read_ptr_writeback_ptr_; - - std::unique_ptr write_ptr_index_event_; - std::atomic write_ptr_index_; - - uint64_t bin_select_; - uint64_t bin_mask_; - GL4ShaderTranslator shader_translator_; std::vector> all_shaders_; std::unordered_map shader_cache_; - GL4Shader* active_vertex_shader_; - GL4Shader* active_pixel_shader_; - CachedFramebuffer* active_framebuffer_; - GLuint last_framebuffer_texture_; + GL4Shader* active_vertex_shader_ = nullptr; + GL4Shader* active_pixel_shader_ = nullptr; + CachedFramebuffer* active_framebuffer_ = nullptr; + GLuint last_framebuffer_texture_ = 0; std::vector cached_framebuffers_; std::vector cached_color_render_targets_; std::vector cached_depth_render_targets_; std::vector> all_pipelines_; std::unordered_map cached_pipelines_; - GLuint point_list_geometry_program_; - GLuint rect_list_geometry_program_; - GLuint quad_list_geometry_program_; - GLuint line_quad_list_geometry_program_; - struct { - xenos::IndexFormat format; - xenos::Endian endianness; - uint32_t count; - uint32_t guest_base; - size_t length; - } index_buffer_info_; - uint32_t draw_index_count_; + GLuint point_list_geometry_program_ = 0; + GLuint rect_list_geometry_program_ = 0; + GLuint quad_list_geometry_program_ = 0; + GLuint line_quad_list_geometry_program_ = 0; TextureCache texture_cache_; diff --git a/src/xenia/gpu/gl4/gl4_graphics_system.cc b/src/xenia/gpu/gl4/gl4_graphics_system.cc index ae073e3a0..210c5bdad 100644 --- a/src/xenia/gpu/gl4/gl4_graphics_system.cc +++ b/src/xenia/gpu/gl4/gl4_graphics_system.cc @@ -12,41 +12,20 @@ #include #include -#include "xenia/base/clock.h" #include "xenia/base/logging.h" #include "xenia/base/profiling.h" -#include "xenia/base/threading.h" #include "xenia/cpu/processor.h" -#include "xenia/emulator.h" +#include "xenia/gpu/gl4/gl4_command_processor.h" #include "xenia/gpu/gl4/gl4_gpu_flags.h" #include "xenia/gpu/gpu_flags.h" -#include "xenia/gpu/tracing.h" #include "xenia/ui/window.h" namespace xe { namespace gpu { namespace gl4 { -void InitializeIfNeeded(); -void CleanupOnShutdown(); - -void InitializeIfNeeded() { - static bool has_initialized = false; - if (has_initialized) { - return; - } - has_initialized = true; - - // - - atexit(CleanupOnShutdown); -} - -void CleanupOnShutdown() {} - -std::unique_ptr Create(Emulator* emulator) { - InitializeIfNeeded(); - return std::make_unique(emulator); +std::unique_ptr Create() { + return std::make_unique(); } std::unique_ptr GL4GraphicsSystem::CreateContext( @@ -57,15 +36,14 @@ std::unique_ptr GL4GraphicsSystem::CreateContext( return xe::ui::gl::GLContext::Create(target_window); } -GL4GraphicsSystem::GL4GraphicsSystem(Emulator* emulator) - : GraphicsSystem(emulator), worker_running_(false) {} +GL4GraphicsSystem::GL4GraphicsSystem() : GraphicsSystem() {} GL4GraphicsSystem::~GL4GraphicsSystem() = default; X_STATUS GL4GraphicsSystem::Setup(cpu::Processor* processor, - ui::Loop* target_loop, + kernel::KernelState* kernel_state, ui::Window* target_window) { - auto result = GraphicsSystem::Setup(processor, target_loop, target_window); + auto result = GraphicsSystem::Setup(processor, kernel_state, target_window); if (result) { return result; } @@ -73,224 +51,14 @@ X_STATUS GL4GraphicsSystem::Setup(cpu::Processor* processor, display_context_ = reinterpret_cast(target_window->context()); - // Watch for paint requests to do our swap. - target_window->on_painting.AddListener( - [this](xe::ui::UIEvent* e) { Swap(e); }); - - // Create rendering control. - // This must happen on the UI thread. - std::unique_ptr processor_context; - target_loop_->PostSynchronous([&]() { - // Setup the GL context the command processor will do all its drawing in. - // It's shared with the display context so that we can resolve framebuffers - // from it. - processor_context = display_context_->CreateShared(); - processor_context->ClearCurrent(); - }); - if (!processor_context) { - xe::FatalError( - "Unable to initialize GL context. Xenia requires OpenGL 4.5. Ensure " - "you have the latest drivers for your GPU and that it supports OpenGL " - "4.5. See http://xenia.jp/faq/ for more information."); - return X_STATUS_UNSUCCESSFUL; - } - - // Create command processor. This will spin up a thread to process all - // incoming ringbuffer packets. - command_processor_ = std::make_unique(this); - if (!command_processor_->Initialize(std::move(processor_context))) { - XELOGE("Unable to initialize command processor"); - return X_STATUS_UNSUCCESSFUL; - } - command_processor_->set_swap_request_handler( - [this]() { target_window_->Invalidate(); }); - - // Let the processor know we want register access callbacks. - memory_->AddVirtualMappedRange( - 0x7FC80000, 0xFFFF0000, 0x0000FFFF, this, - reinterpret_cast(MMIOReadRegisterThunk), - reinterpret_cast(MMIOWriteRegisterThunk)); - - // 60hz vsync timer. - worker_running_ = true; - worker_thread_ = - kernel::object_ref(new kernel::XHostThread( - emulator()->kernel_state(), 128 * 1024, 0, [this]() { - uint64_t vsync_duration = FLAGS_vsync ? 16 : 1; - uint64_t last_frame_time = Clock::QueryGuestTickCount(); - while (worker_running_) { - uint64_t current_time = Clock::QueryGuestTickCount(); - uint64_t elapsed = (current_time - last_frame_time) / - (Clock::guest_tick_frequency() / 1000); - if (elapsed >= vsync_duration) { - MarkVblank(); - last_frame_time = current_time; - } - xe::threading::Sleep(std::chrono::milliseconds(1)); - } - return 0; - })); - // As we run vblank interrupts the debugger must be able to suspend us. - worker_thread_->set_can_debugger_suspend(true); - worker_thread_->set_name("GL4 Vsync"); - worker_thread_->Create(); - - if (FLAGS_trace_gpu_stream) { - BeginTracing(); - } - return X_STATUS_SUCCESS; } -void GL4GraphicsSystem::Shutdown() { - EndTracing(); +void GL4GraphicsSystem::Shutdown() { GraphicsSystem::Shutdown(); } - worker_running_ = false; - worker_thread_->Wait(0, 0, 0, nullptr); - worker_thread_.reset(); - - command_processor_->Shutdown(); - - // TODO(benvanik): remove mapped range. - - command_processor_.reset(); - - GraphicsSystem::Shutdown(); -} - -void GL4GraphicsSystem::InitializeRingBuffer(uint32_t ptr, - uint32_t page_count) { - command_processor_->InitializeRingBuffer(ptr, page_count); -} - -void GL4GraphicsSystem::EnableReadPointerWriteBack(uint32_t ptr, - uint32_t block_size) { - command_processor_->EnableReadPointerWriteBack(ptr, block_size); -} - -void GL4GraphicsSystem::RequestFrameTrace() { - command_processor_->RequestFrameTrace(xe::to_wstring(FLAGS_trace_gpu_prefix)); -} - -void GL4GraphicsSystem::BeginTracing() { - command_processor_->BeginTracing(xe::to_wstring(FLAGS_trace_gpu_prefix)); -} - -void GL4GraphicsSystem::EndTracing() { command_processor_->EndTracing(); } - -void GL4GraphicsSystem::PlayTrace(const uint8_t* trace_data, size_t trace_size, - TracePlaybackMode playback_mode) { - command_processor_->CallInThread([this, trace_data, trace_size, - playback_mode]() { - command_processor_->set_swap_mode(SwapMode::kIgnored); - - auto trace_ptr = trace_data; - bool pending_break = false; - const PacketStartCommand* pending_packet = nullptr; - while (trace_ptr < trace_data + trace_size) { - auto type = static_cast(xe::load(trace_ptr)); - switch (type) { - case TraceCommandType::kPrimaryBufferStart: { - auto cmd = - reinterpret_cast(trace_ptr); - // - trace_ptr += sizeof(*cmd) + cmd->count * 4; - break; - } - case TraceCommandType::kPrimaryBufferEnd: { - auto cmd = - reinterpret_cast(trace_ptr); - // - trace_ptr += sizeof(*cmd); - break; - } - case TraceCommandType::kIndirectBufferStart: { - auto cmd = - reinterpret_cast(trace_ptr); - // - trace_ptr += sizeof(*cmd) + cmd->count * 4; - break; - } - case TraceCommandType::kIndirectBufferEnd: { - auto cmd = - reinterpret_cast(trace_ptr); - // - trace_ptr += sizeof(*cmd); - break; - } - case TraceCommandType::kPacketStart: { - auto cmd = reinterpret_cast(trace_ptr); - trace_ptr += sizeof(*cmd); - std::memcpy(memory()->TranslatePhysical(cmd->base_ptr), trace_ptr, - cmd->count * 4); - trace_ptr += cmd->count * 4; - pending_packet = cmd; - break; - } - case TraceCommandType::kPacketEnd: { - auto cmd = reinterpret_cast(trace_ptr); - trace_ptr += sizeof(*cmd); - if (pending_packet) { - command_processor_->ExecutePacket(pending_packet->base_ptr, - pending_packet->count); - pending_packet = nullptr; - } - if (pending_break) { - return; - } - break; - } - case TraceCommandType::kMemoryRead: { - auto cmd = reinterpret_cast(trace_ptr); - trace_ptr += sizeof(*cmd); - std::memcpy(memory()->TranslatePhysical(cmd->base_ptr), trace_ptr, - cmd->length); - trace_ptr += cmd->length; - break; - } - case TraceCommandType::kMemoryWrite: { - auto cmd = reinterpret_cast(trace_ptr); - trace_ptr += sizeof(*cmd); - // ? - trace_ptr += cmd->length; - break; - } - case TraceCommandType::kEvent: { - auto cmd = reinterpret_cast(trace_ptr); - trace_ptr += sizeof(*cmd); - switch (cmd->event_type) { - case EventType::kSwap: { - if (playback_mode == TracePlaybackMode::kBreakOnSwap) { - pending_break = true; - } - break; - } - } - break; - } - } - } - - command_processor_->set_swap_mode(SwapMode::kNormal); - command_processor_->IssueSwap(0, 1280, 720); - }); -} - -void GL4GraphicsSystem::ClearCaches() { - command_processor_->CallInThread( - [&]() { command_processor_->ClearCaches(); }); -} - -void GL4GraphicsSystem::MarkVblank() { - SCOPE_profile_cpu_f("gpu"); - - // Increment vblank counter (so the game sees us making progress). - command_processor_->increment_counter(); - - // TODO(benvanik): we shouldn't need to do the dispatch here, but there's - // something wrong and the CP will block waiting for code that - // needs to be run in the interrupt. - DispatchInterruptCallback(0, 2); +std::unique_ptr GL4GraphicsSystem::CreateCommandProcessor() { + return std::unique_ptr( + new GL4CommandProcessor(this, kernel_state_)); } void GL4GraphicsSystem::Swap(xe::ui::UIEvent* e) { @@ -315,51 +83,12 @@ void GL4GraphicsSystem::Swap(xe::ui::UIEvent* e) { // Blit the frontbuffer. display_context_->blitter()->BlitTexture2D( - swap_state.front_buffer_texture, + static_cast(swap_state.front_buffer_texture), Rect2D(0, 0, swap_state.width, swap_state.height), Rect2D(0, 0, target_window_->width(), target_window_->height()), GL_LINEAR); } -uint32_t GL4GraphicsSystem::ReadRegister(uint32_t addr) { - uint32_t r = addr & 0xFFFF; - - switch (r) { - case 0x3C00: // ? - return 0x08100748; - case 0x3C04: // ? - return 0x0000200E; - case 0x6530: // Scanline? - return 0x000002D0; - case 0x6544: // ? vblank pending? - return 1; - case 0x6584: // Screen res - 1280x720 - return 0x050002D0; - } - - assert_true(r < RegisterFile::kRegisterCount); - return register_file_.values[r].u32; -} - -void GL4GraphicsSystem::WriteRegister(uint32_t addr, uint32_t value) { - uint32_t r = addr & 0xFFFF; - - switch (r) { - case 0x0714: // CP_RB_WPTR - command_processor_->UpdateWritePointer(value); - break; - case 0x6110: // ? swap related? - XELOGW("Unimplemented GPU register %.4X write: %.8X", r, value); - return; - default: - XELOGW("Unknown GPU register %.4X write: %.8X", r, value); - break; - } - - assert_true(r < RegisterFile::kRegisterCount); - register_file_.values[r].u32 = value; -} - } // namespace gl4 } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/gl4/gl4_graphics_system.h b/src/xenia/gpu/gl4/gl4_graphics_system.h index 473a7f431..cd712040d 100644 --- a/src/xenia/gpu/gl4/gl4_graphics_system.h +++ b/src/xenia/gpu/gl4/gl4_graphics_system.h @@ -12,10 +12,7 @@ #include -#include "xenia/gpu/gl4/command_processor.h" #include "xenia/gpu/graphics_system.h" -#include "xenia/gpu/register_file.h" -#include "xenia/kernel/xthread.h" #include "xenia/ui/gl/gl_context.h" namespace xe { @@ -24,53 +21,22 @@ namespace gl4 { class GL4GraphicsSystem : public GraphicsSystem { public: - explicit GL4GraphicsSystem(Emulator* emulator); + GL4GraphicsSystem(); ~GL4GraphicsSystem() override; std::unique_ptr CreateContext( ui::Window* target_window) override; - X_STATUS Setup(cpu::Processor* processor, ui::Loop* target_loop, + X_STATUS Setup(cpu::Processor* processor, kernel::KernelState* kernel_state, ui::Window* target_window) override; void Shutdown() override; - RegisterFile* register_file() { return ®ister_file_; } - CommandProcessor* command_processor() const { - return command_processor_.get(); - } - - void InitializeRingBuffer(uint32_t ptr, uint32_t page_count) override; - void EnableReadPointerWriteBack(uint32_t ptr, uint32_t block_size) override; - - void RequestFrameTrace() override; - void BeginTracing() override; - void EndTracing() override; - void PlayTrace(const uint8_t* trace_data, size_t trace_size, - TracePlaybackMode playback_mode) override; - void ClearCaches() override; - private: - void MarkVblank(); - void Swap(xe::ui::UIEvent* e); - uint32_t ReadRegister(uint32_t addr); - void WriteRegister(uint32_t addr, uint32_t value); + std::unique_ptr CreateCommandProcessor() override; - static uint32_t MMIOReadRegisterThunk(void* ppc_context, - GL4GraphicsSystem* gs, uint32_t addr) { - return gs->ReadRegister(addr); - } - static void MMIOWriteRegisterThunk(void* ppc_context, GL4GraphicsSystem* gs, - uint32_t addr, uint32_t value) { - gs->WriteRegister(addr, value); - } - - RegisterFile register_file_; - std::unique_ptr command_processor_; + void Swap(xe::ui::UIEvent* e) override; xe::ui::gl::GLContext* display_context_ = nullptr; - - std::atomic worker_running_; - kernel::object_ref worker_thread_; }; } // namespace gl4 diff --git a/src/xenia/gpu/gl4/trace_viewer_main.cc b/src/xenia/gpu/gl4/gl4_trace_viewer_main.cc similarity index 65% rename from src/xenia/gpu/gl4/trace_viewer_main.cc rename to src/xenia/gpu/gl4/gl4_trace_viewer_main.cc index 73874f00a..56ea3b062 100644 --- a/src/xenia/gpu/gl4/trace_viewer_main.cc +++ b/src/xenia/gpu/gl4/gl4_trace_viewer_main.cc @@ -8,6 +8,7 @@ */ #include + #include #include "third_party/imgui/imgui.h" @@ -19,16 +20,21 @@ #include "xenia/base/platform_win.h" #include "xenia/base/profiling.h" #include "xenia/emulator.h" +#include "xenia/gpu/command_processor.h" #include "xenia/gpu/graphics_system.h" +#include "xenia/gpu/packet_disassembler.h" #include "xenia/gpu/register_file.h" -#include "xenia/gpu/tracing.h" +#include "xenia/gpu/sampler_info.h" +#include "xenia/gpu/texture_info.h" +#include "xenia/gpu/trace_player.h" +#include "xenia/gpu/trace_protocol.h" #include "xenia/gpu/xenos.h" #include "xenia/ui/gl/gl_context.h" #include "xenia/ui/imgui_drawer.h" #include "xenia/ui/window.h" // HACK: until we have another impl, we just use gl4 directly. -#include "xenia/gpu/gl4/command_processor.h" +#include "xenia/gpu/gl4/gl4_command_processor.h" #include "xenia/gpu/gl4/gl4_graphics_system.h" #include "xenia/gpu/gl4/gl4_shader.h" @@ -37,810 +43,7 @@ DEFINE_string(target_trace_file, "", "Specifies the trace file to load."); namespace xe { namespace gpu { -enum class PacketCategory { - kGeneric, - kDraw, - kSwap, -}; -struct PacketTypeInfo { - PacketCategory category; - const char* name; -}; -struct PacketAction { - enum class Type { - kRegisterWrite, - kSetBinMask, - kSetBinSelect, - }; - Type type; - union { - struct { - uint32_t index; - RegisterFile::RegisterValue value; - } register_write; - struct { - uint64_t value; - } set_bin_mask; - struct { - uint64_t value; - } set_bin_select; - }; - static PacketAction RegisterWrite(uint32_t index, uint32_t value) { - PacketAction action; - action.type = Type::kRegisterWrite; - action.register_write.index = index; - action.register_write.value.u32 = value; - return action; - } - static PacketAction SetBinMask(uint64_t value) { - PacketAction action; - action.type = Type::kSetBinMask; - action.set_bin_mask.value = value; - return action; - } - static PacketAction SetBinSelect(uint64_t value) { - PacketAction action; - action.type = Type::kSetBinSelect; - action.set_bin_select.value = value; - return action; - } -}; -struct PacketInfo { - const PacketTypeInfo* type_info; - bool predicated; - uint32_t count; - std::vector actions; -}; -bool DisasmPacketType0(const uint8_t* base_ptr, uint32_t packet, - PacketInfo* out_info) { - static const PacketTypeInfo type_0_info = {PacketCategory::kGeneric, - "PM4_TYPE0"}; - out_info->type_info = &type_0_info; - - uint32_t count = ((packet >> 16) & 0x3FFF) + 1; - out_info->count = 1 + count; - auto ptr = base_ptr + 4; - - uint32_t base_index = (packet & 0x7FFF); - uint32_t write_one_reg = (packet >> 15) & 0x1; - for (uint32_t m = 0; m < count; m++) { - uint32_t reg_data = xe::load_and_swap(ptr); - uint32_t target_index = write_one_reg ? base_index : base_index + m; - out_info->actions.emplace_back( - PacketAction::RegisterWrite(target_index, reg_data)); - ptr += 4; - } - - return true; -} -bool DisasmPacketType1(const uint8_t* base_ptr, uint32_t packet, - PacketInfo* out_info) { - static const PacketTypeInfo type_1_info = {PacketCategory::kGeneric, - "PM4_TYPE1"}; - out_info->type_info = &type_1_info; - - out_info->count = 1 + 2; - auto ptr = base_ptr + 4; - - uint32_t reg_index_1 = packet & 0x7FF; - uint32_t reg_index_2 = (packet >> 11) & 0x7FF; - uint32_t reg_data_1 = xe::load_and_swap(ptr); - uint32_t reg_data_2 = xe::load_and_swap(ptr + 4); - out_info->actions.emplace_back( - PacketAction::RegisterWrite(reg_index_1, reg_data_1)); - out_info->actions.emplace_back( - PacketAction::RegisterWrite(reg_index_2, reg_data_2)); - - return true; -} -bool DisasmPacketType2(const uint8_t* base_ptr, uint32_t packet, - PacketInfo* out_info) { - static const PacketTypeInfo type_2_info = {PacketCategory::kGeneric, - "PM4_TYPE2"}; - out_info->type_info = &type_2_info; - - out_info->count = 1; - - return true; -} using namespace xe::gpu::xenos; -bool DisasmPacketType3(const uint8_t* base_ptr, uint32_t packet, - PacketInfo* out_info) { - static const PacketTypeInfo type_3_unknown_info = {PacketCategory::kGeneric, - "PM4_TYPE3_UNKNOWN"}; - out_info->type_info = &type_3_unknown_info; - - uint32_t opcode = (packet >> 8) & 0x7F; - uint32_t count = ((packet >> 16) & 0x3FFF) + 1; - out_info->count = 1 + count; - auto ptr = base_ptr + 4; - - if (packet & 1) { - out_info->predicated = true; - } - - bool result = true; - switch (opcode) { - case PM4_ME_INIT: { - // initialize CP's micro-engine - static const PacketTypeInfo op_info = {PacketCategory::kGeneric, - "PM4_ME_INIT"}; - out_info->type_info = &op_info; - break; - } - case PM4_NOP: { - // skip N 32-bit words to get to the next packet - // No-op, ignore some data. - static const PacketTypeInfo op_info = {PacketCategory::kGeneric, - "PM4_NOP"}; - out_info->type_info = &op_info; - break; - } - case PM4_INTERRUPT: { - // generate interrupt from the command stream - static const PacketTypeInfo op_info = {PacketCategory::kGeneric, - "PM4_INTERRUPT"}; - out_info->type_info = &op_info; - uint32_t cpu_mask = xe::load_and_swap(ptr + 0); - for (int n = 0; n < 6; n++) { - if (cpu_mask & (1 << n)) { - // graphics_system_->DispatchInterruptCallback(1, n); - } - } - break; - } - case PM4_XE_SWAP: { - // Xenia-specific VdSwap hook. - // VdSwap will post this to tell us we need to swap the screen/fire an - // interrupt. - // 63 words here, but only the first has any data. - static const PacketTypeInfo op_info = {PacketCategory::kSwap, - "PM4_XE_SWAP"}; - out_info->type_info = &op_info; - uint32_t frontbuffer_ptr = xe::load_and_swap(ptr + 0); - break; - } - case PM4_INDIRECT_BUFFER: { - // indirect buffer dispatch - static const PacketTypeInfo op_info = {PacketCategory::kGeneric, - "PM4_INDIRECT_BUFFER"}; - out_info->type_info = &op_info; - uint32_t list_ptr = xe::load_and_swap(ptr + 0); - uint32_t list_length = xe::load_and_swap(ptr + 4); - break; - } - case PM4_WAIT_REG_MEM: { - // wait until a register or memory location is a specific value - static const PacketTypeInfo op_info = {PacketCategory::kGeneric, - "PM4_WAIT_REG_MEM"}; - out_info->type_info = &op_info; - uint32_t wait_info = xe::load_and_swap(ptr + 0); - uint32_t poll_reg_addr = xe::load_and_swap(ptr + 4); - uint32_t ref = xe::load_and_swap(ptr + 8); - uint32_t mask = xe::load_and_swap(ptr + 12); - uint32_t wait = xe::load_and_swap(ptr + 16); - break; - } - case PM4_REG_RMW: { - // register read/modify/write - // ? (used during shader upload and edram setup) - static const PacketTypeInfo op_info = {PacketCategory::kGeneric, - "PM4_REG_RMW"}; - out_info->type_info = &op_info; - uint32_t rmw_info = xe::load_and_swap(ptr + 0); - uint32_t and_mask = xe::load_and_swap(ptr + 4); - uint32_t or_mask = xe::load_and_swap(ptr + 8); - break; - } - case PM4_COND_WRITE: { - // conditional write to memory or register - static const PacketTypeInfo op_info = {PacketCategory::kGeneric, - "PM4_COND_WRITE"}; - out_info->type_info = &op_info; - uint32_t wait_info = xe::load_and_swap(ptr + 0); - uint32_t poll_reg_addr = xe::load_and_swap(ptr + 4); - uint32_t ref = xe::load_and_swap(ptr + 8); - uint32_t mask = xe::load_and_swap(ptr + 12); - uint32_t write_reg_addr = xe::load_and_swap(ptr + 16); - uint32_t write_data = xe::load_and_swap(ptr + 20); - break; - } - case PM4_EVENT_WRITE: { - // generate an event that creates a write to memory when completed - static const PacketTypeInfo op_info = {PacketCategory::kGeneric, - "PM4_EVENT_WRITE"}; - out_info->type_info = &op_info; - uint32_t initiator = xe::load_and_swap(ptr + 0); - break; - } - case PM4_EVENT_WRITE_SHD: { - // generate a VS|PS_done event - static const PacketTypeInfo op_info = {PacketCategory::kGeneric, - "PM4_EVENT_WRITE_SHD"}; - out_info->type_info = &op_info; - uint32_t initiator = xe::load_and_swap(ptr + 0); - uint32_t address = xe::load_and_swap(ptr + 4); - uint32_t value = xe::load_and_swap(ptr + 8); - break; - } - case PM4_EVENT_WRITE_EXT: { - // generate a screen extent event - static const PacketTypeInfo op_info = {PacketCategory::kGeneric, - "PM4_EVENT_WRITE_EXT"}; - out_info->type_info = &op_info; - uint32_t unk0 = xe::load_and_swap(ptr + 0); - uint32_t unk1 = xe::load_and_swap(ptr + 4); - break; - } - case PM4_DRAW_INDX: { - // initiate fetch of index buffer and draw - // dword0 = viz query info - static const PacketTypeInfo op_info = {PacketCategory::kDraw, - "PM4_DRAW_INDX"}; - out_info->type_info = &op_info; - uint32_t dword0 = xe::load_and_swap(ptr + 0); - uint32_t dword1 = xe::load_and_swap(ptr + 4); - uint32_t index_count = dword1 >> 16; - auto prim_type = static_cast(dword1 & 0x3F); - uint32_t src_sel = (dword1 >> 6) & 0x3; - if (src_sel == 0x0) { - // Indexed draw. - uint32_t guest_base = xe::load_and_swap(ptr + 8); - uint32_t index_size = xe::load_and_swap(ptr + 12); - auto endianness = static_cast(index_size >> 30); - index_size &= 0x00FFFFFF; - bool index_32bit = (dword1 >> 11) & 0x1; - index_size *= index_32bit ? 4 : 2; - } else if (src_sel == 0x2) { - // Auto draw. - } else { - // Unknown source select. - assert_always(); - } - break; - } - case PM4_DRAW_INDX_2: { - // draw using supplied indices in packet - static const PacketTypeInfo op_info = {PacketCategory::kDraw, - "PM4_DRAW_INDX_2"}; - out_info->type_info = &op_info; - uint32_t dword0 = xe::load_and_swap(ptr + 0); - uint32_t index_count = dword0 >> 16; - auto prim_type = static_cast(dword0 & 0x3F); - uint32_t src_sel = (dword0 >> 6) & 0x3; - assert_true(src_sel == 0x2); // 'SrcSel=AutoIndex' - bool index_32bit = (dword0 >> 11) & 0x1; - uint32_t indices_size = index_count * (index_32bit ? 4 : 2); - auto index_ptr = ptr + 4; - break; - } - case PM4_SET_CONSTANT: { - // load constant into chip and to memory - // PM4_REG(reg) ((0x4 << 16) | (GSL_HAL_SUBBLOCK_OFFSET(reg))) - // reg - 0x2000 - static const PacketTypeInfo op_info = {PacketCategory::kGeneric, - "PM4_SET_CONSTANT"}; - out_info->type_info = &op_info; - uint32_t offset_type = xe::load_and_swap(ptr + 0); - uint32_t index = offset_type & 0x7FF; - uint32_t type = (offset_type >> 16) & 0xFF; - switch (type) { - case 0: // ALU - index += 0x4000; - break; - case 1: // FETCH - index += 0x4800; - break; - case 2: // BOOL - index += 0x4900; - break; - case 3: // LOOP - index += 0x4908; - break; - case 4: // REGISTERS - index += 0x2000; - break; - default: - assert_always(); - result = false; - break; - } - for (uint32_t n = 0; n < count - 1; n++, index++) { - uint32_t data = xe::load_and_swap(ptr + 4 + n * 4); - out_info->actions.emplace_back( - PacketAction::RegisterWrite(index, data)); - } - break; - } - case PM4_SET_CONSTANT2: { - static const PacketTypeInfo op_info = {PacketCategory::kGeneric, - "PM4_SET_CONSTANT2"}; - out_info->type_info = &op_info; - uint32_t offset_type = xe::load_and_swap(ptr + 0); - uint32_t index = offset_type & 0xFFFF; - for (uint32_t n = 0; n < count - 1; n++, index++) { - uint32_t data = xe::load_and_swap(ptr + 4 + n * 4); - out_info->actions.emplace_back( - PacketAction::RegisterWrite(index, data)); - } - return true; - break; - } - case PM4_LOAD_ALU_CONSTANT: { - // load constants from memory - static const PacketTypeInfo op_info = {PacketCategory::kGeneric, - "PM4_LOAD_ALU_CONSTANT"}; - out_info->type_info = &op_info; - uint32_t address = xe::load_and_swap(ptr + 0); - address &= 0x3FFFFFFF; - uint32_t offset_type = xe::load_and_swap(ptr + 4); - uint32_t index = offset_type & 0x7FF; - uint32_t size_dwords = xe::load_and_swap(ptr + 8); - size_dwords &= 0xFFF; - uint32_t type = (offset_type >> 16) & 0xFF; - switch (type) { - case 0: // ALU - index += 0x4000; - break; - case 1: // FETCH - index += 0x4800; - break; - case 2: // BOOL - index += 0x4900; - break; - case 3: // LOOP - index += 0x4908; - break; - case 4: // REGISTERS - index += 0x2000; - break; - default: - assert_always(); - return true; - } - for (uint32_t n = 0; n < size_dwords; n++, index++) { - // Hrm, ? - // xe::load_and_swap(membase_ + GpuToCpu(address + n * 4)); - uint32_t data = 0xDEADBEEF; - out_info->actions.emplace_back( - PacketAction::RegisterWrite(index, data)); - } - break; - } - case PM4_SET_SHADER_CONSTANTS: { - static const PacketTypeInfo op_info = {PacketCategory::kGeneric, - "PM4_SET_SHADER_CONSTANTS"}; - out_info->type_info = &op_info; - uint32_t offset_type = xe::load_and_swap(ptr + 0); - uint32_t index = offset_type & 0xFFFF; - for (uint32_t n = 0; n < count - 1; n++, index++) { - uint32_t data = xe::load_and_swap(ptr + 4 + n * 4); - out_info->actions.emplace_back( - PacketAction::RegisterWrite(index, data)); - } - return true; - } - case PM4_IM_LOAD: { - // load sequencer instruction memory (pointer-based) - static const PacketTypeInfo op_info = {PacketCategory::kGeneric, - "PM4_IM_LOAD"}; - out_info->type_info = &op_info; - uint32_t addr_type = xe::load_and_swap(ptr + 0); - auto shader_type = static_cast(addr_type & 0x3); - uint32_t addr = addr_type & ~0x3; - uint32_t start_size = xe::load_and_swap(ptr + 4); - uint32_t start = start_size >> 16; - uint32_t size_dwords = start_size & 0xFFFF; // dwords - assert_true(start == 0); - break; - } - case PM4_IM_LOAD_IMMEDIATE: { - // load sequencer instruction memory (code embedded in packet) - static const PacketTypeInfo op_info = {PacketCategory::kGeneric, - "PM4_IM_LOAD_IMMEDIATE"}; - out_info->type_info = &op_info; - uint32_t dword0 = xe::load_and_swap(ptr + 0); - uint32_t dword1 = xe::load_and_swap(ptr + 4); - auto shader_type = static_cast(dword0); - uint32_t start_size = dword1; - uint32_t start = start_size >> 16; - uint32_t size_dwords = start_size & 0xFFFF; // dwords - assert_true(start == 0); - break; - } - case PM4_INVALIDATE_STATE: { - // selective invalidation of state pointers - static const PacketTypeInfo op_info = {PacketCategory::kGeneric, - "PM4_INVALIDATE_STATE"}; - out_info->type_info = &op_info; - uint32_t mask = xe::load_and_swap(ptr + 0); - break; - } - case PM4_SET_BIN_MASK_LO: { - static const PacketTypeInfo op_info = {PacketCategory::kGeneric, - "PM4_SET_BIN_MASK_LO"}; - out_info->type_info = &op_info; - uint32_t value = xe::load_and_swap(ptr); - // bin_mask_ = (bin_mask_ & 0xFFFFFFFF00000000ull) | value; - out_info->actions.emplace_back(PacketAction::SetBinMask(value)); - break; - } - case PM4_SET_BIN_MASK_HI: { - static const PacketTypeInfo op_info = {PacketCategory::kGeneric, - "PM4_SET_BIN_MASK_HI"}; - out_info->type_info = &op_info; - uint32_t value = xe::load_and_swap(ptr); - // bin_mask_ = - // (bin_mask_ & 0xFFFFFFFFull) | (static_cast(value) << 32); - break; - } - case PM4_SET_BIN_SELECT_LO: { - static const PacketTypeInfo op_info = {PacketCategory::kGeneric, - "PM4_SET_BIN_SELECT_LO"}; - out_info->type_info = &op_info; - uint32_t value = xe::load_and_swap(ptr); - // bin_select_ = (bin_select_ & 0xFFFFFFFF00000000ull) | value; - out_info->actions.emplace_back(PacketAction::SetBinSelect(value)); - break; - } - case PM4_SET_BIN_SELECT_HI: { - static const PacketTypeInfo op_info = {PacketCategory::kGeneric, - "PM4_SET_BIN_SELECT_HI"}; - out_info->type_info = &op_info; - uint32_t value = xe::load_and_swap(ptr); - // bin_select_ = - // (bin_select_ & 0xFFFFFFFFull) | (static_cast(value) << 32); - break; - } - - // Ignored packets - useful if breaking on the default handler below. - case 0x50: { // 0xC0015000 usually 2 words, 0xFFFFFFFF / 0x00000000 - static const PacketTypeInfo op_info = {PacketCategory::kGeneric, - "PM4_TYPE3_0x50"}; - out_info->type_info = &op_info; - break; - } - case 0x51: { // 0xC0015100 usually 2 words, 0xFFFFFFFF / 0xFFFFFFFF - static const PacketTypeInfo op_info = {PacketCategory::kGeneric, - "PM4_TYPE3_0x51"}; - out_info->type_info = &op_info; - break; - } - default: { - result = false; - break; - } - } - - return result; -} -bool DisasmPacket(const uint8_t* base_ptr, PacketInfo* out_info) { - const uint32_t packet = xe::load_and_swap(base_ptr); - const uint32_t packet_type = packet >> 30; - switch (packet_type) { - case 0x00: - return DisasmPacketType0(base_ptr, packet, out_info); - case 0x01: - return DisasmPacketType1(base_ptr, packet, out_info); - case 0x02: - return DisasmPacketType2(base_ptr, packet, out_info); - case 0x03: - return DisasmPacketType3(base_ptr, packet, out_info); - default: - assert_unhandled_case(packet_type); - return false; - } -} - -PacketCategory GetPacketCategory(const uint8_t* base_ptr) { - const uint32_t packet = xe::load_and_swap(base_ptr); - const uint32_t packet_type = packet >> 30; - switch (packet_type) { - case 0x00: - case 0x01: - case 0x02: { - return PacketCategory::kGeneric; - } - case 0x03: { - uint32_t opcode = (packet >> 8) & 0x7F; - switch (opcode) { - case PM4_DRAW_INDX: - case PM4_DRAW_INDX_2: - return PacketCategory::kDraw; - case PM4_XE_SWAP: - return PacketCategory::kSwap; - default: - return PacketCategory::kGeneric; - } - } - default: { - assert_unhandled_case(packet_type); - return PacketCategory::kGeneric; - } - } -} - -// TODO(benvanik): move to tracing.h/cc - -class TraceReader { - public: - struct Frame { - struct Command { - enum class Type { - kDraw, - kSwap, - }; - const uint8_t* head_ptr; - const uint8_t* start_ptr; - const uint8_t* end_ptr; - Type type; - union { - struct { - // - } draw; - struct { - // - } swap; - }; - }; - - const uint8_t* start_ptr; - const uint8_t* end_ptr; - int command_count; - std::vector commands; - }; - - TraceReader() : trace_data_(nullptr), trace_size_(0) {} - ~TraceReader() = default; - - const Frame* frame(int n) const { return &frames_[n]; } - int frame_count() const { return int(frames_.size()); } - - bool Open(const std::wstring& path) { - Close(); - - mmap_ = MappedMemory::Open(path, MappedMemory::Mode::kRead); - if (!mmap_) { - return false; - } - - trace_data_ = reinterpret_cast(mmap_->data()); - trace_size_ = mmap_->size(); - - ParseTrace(); - - return true; - } - - void Close() { - mmap_.reset(); - trace_data_ = nullptr; - trace_size_ = 0; - } - - // void Foo() { - // auto trace_ptr = trace_data; - // while (trace_ptr < trace_data + trace_size) { - // auto cmd_type = *reinterpret_cast(trace_ptr); - // switch (cmd_type) { - // case TraceCommandType::kPrimaryBufferStart: - // break; - // case TraceCommandType::kPrimaryBufferEnd: - // break; - // case TraceCommandType::kIndirectBufferStart: - // break; - // case TraceCommandType::kIndirectBufferEnd: - // break; - // case TraceCommandType::kPacketStart: - // break; - // case TraceCommandType::kPacketEnd: - // break; - // case TraceCommandType::kMemoryRead: - // break; - // case TraceCommandType::kMemoryWrite: - // break; - // case TraceCommandType::kEvent: - // break; - // } - // /*trace_ptr = graphics_system->PlayTrace( - // trace_ptr, trace_size - (trace_ptr - trace_data), - // GraphicsSystem::TracePlaybackMode::kBreakOnSwap);*/ - // } - //} - - protected: - void ParseTrace() { - auto trace_ptr = trace_data_; - Frame current_frame = { - trace_ptr, nullptr, 0, - }; - const PacketStartCommand* packet_start = nullptr; - const uint8_t* packet_start_ptr = nullptr; - const uint8_t* last_ptr = trace_ptr; - bool pending_break = false; - while (trace_ptr < trace_data_ + trace_size_) { - ++current_frame.command_count; - auto type = static_cast(xe::load(trace_ptr)); - switch (type) { - case TraceCommandType::kPrimaryBufferStart: { - auto cmd = - reinterpret_cast(trace_ptr); - trace_ptr += sizeof(*cmd) + cmd->count * 4; - break; - } - case TraceCommandType::kPrimaryBufferEnd: { - auto cmd = - reinterpret_cast(trace_ptr); - trace_ptr += sizeof(*cmd); - break; - } - case TraceCommandType::kIndirectBufferStart: { - auto cmd = - reinterpret_cast(trace_ptr); - trace_ptr += sizeof(*cmd) + cmd->count * 4; - break; - } - case TraceCommandType::kIndirectBufferEnd: { - auto cmd = - reinterpret_cast(trace_ptr); - trace_ptr += sizeof(*cmd); - break; - } - case TraceCommandType::kPacketStart: { - auto cmd = reinterpret_cast(trace_ptr); - packet_start_ptr = trace_ptr; - packet_start = cmd; - trace_ptr += sizeof(*cmd) + cmd->count * 4; - break; - } - case TraceCommandType::kPacketEnd: { - auto cmd = reinterpret_cast(trace_ptr); - trace_ptr += sizeof(*cmd); - if (!packet_start_ptr) { - continue; - } - auto packet_category = - GetPacketCategory(packet_start_ptr + sizeof(*packet_start)); - switch (packet_category) { - case PacketCategory::kDraw: { - Frame::Command command; - command.type = Frame::Command::Type::kDraw; - command.head_ptr = packet_start_ptr; - command.start_ptr = last_ptr; - command.end_ptr = trace_ptr; - current_frame.commands.push_back(std::move(command)); - last_ptr = trace_ptr; - break; - } - case PacketCategory::kSwap: { - // - break; - } - } - if (pending_break) { - current_frame.end_ptr = trace_ptr; - frames_.push_back(std::move(current_frame)); - current_frame.start_ptr = trace_ptr; - current_frame.end_ptr = nullptr; - current_frame.command_count = 0; - pending_break = false; - } - break; - } - case TraceCommandType::kMemoryRead: { - auto cmd = reinterpret_cast(trace_ptr); - trace_ptr += sizeof(*cmd) + cmd->length; - break; - } - case TraceCommandType::kMemoryWrite: { - auto cmd = reinterpret_cast(trace_ptr); - trace_ptr += sizeof(*cmd) + cmd->length; - break; - } - case TraceCommandType::kEvent: { - auto cmd = reinterpret_cast(trace_ptr); - trace_ptr += sizeof(*cmd); - switch (cmd->event_type) { - case EventType::kSwap: { - pending_break = true; - break; - } - } - break; - } - default: - // Broken trace file? - assert_unhandled_case(type); - break; - } - } - if (pending_break || current_frame.command_count) { - current_frame.end_ptr = trace_ptr; - frames_.push_back(std::move(current_frame)); - } - } - - std::unique_ptr mmap_; - const uint8_t* trace_data_; - size_t trace_size_; - std::vector frames_; -}; - -class TracePlayer : public TraceReader { - public: - TracePlayer(xe::ui::Loop* loop, GraphicsSystem* graphics_system) - : loop_(loop), - graphics_system_(graphics_system), - current_frame_index_(0), - current_command_index_(-1) { - // Need to allocate all of physical memory so that we can write to it - // during playback. - graphics_system_->memory() - ->LookupHeapByType(true, 4096) - ->AllocFixed(0, 0x1FFFFFFF, 4096, - kMemoryAllocationReserve | kMemoryAllocationCommit, - kMemoryProtectRead | kMemoryProtectWrite); - } - ~TracePlayer() = default; - - GraphicsSystem* graphics_system() const { return graphics_system_; } - int current_frame_index() const { return current_frame_index_; } - - const Frame* current_frame() const { - if (current_frame_index_ > frame_count()) { - return nullptr; - } - return frame(current_frame_index_); - } - - void SeekFrame(int target_frame) { - if (current_frame_index_ == target_frame) { - return; - } - current_frame_index_ = target_frame; - auto frame = current_frame(); - current_command_index_ = int(frame->commands.size()) - 1; - - assert_true(frame->start_ptr <= frame->end_ptr); - graphics_system_->PlayTrace( - frame->start_ptr, frame->end_ptr - frame->start_ptr, - GraphicsSystem::TracePlaybackMode::kBreakOnSwap); - } - - int current_command_index() const { return current_command_index_; } - - void SeekCommand(int target_command) { - if (current_command_index_ == target_command) { - return; - } - int previous_command_index = current_command_index_; - current_command_index_ = target_command; - if (current_command_index_ == -1) { - return; - } - auto frame = current_frame(); - const auto& command = frame->commands[target_command]; - assert_true(frame->start_ptr <= command.end_ptr); - if (target_command && previous_command_index == target_command - 1) { - // Seek forward. - const auto& previous_command = frame->commands[target_command - 1]; - graphics_system_->PlayTrace( - previous_command.end_ptr, command.end_ptr - previous_command.end_ptr, - GraphicsSystem::TracePlaybackMode::kBreakOnSwap); - } else { - // Full playback from frame start. - graphics_system_->PlayTrace( - frame->start_ptr, command.end_ptr - frame->start_ptr, - GraphicsSystem::TracePlaybackMode::kBreakOnSwap); - } - } - - private: - xe::ui::Loop* loop_; - GraphicsSystem* graphics_system_; - int current_frame_index_; - int current_command_index_; -}; void DrawControllerUI(xe::ui::Window* window, TracePlayer& player, Memory* memory) { @@ -1126,7 +329,7 @@ void DrawFailedTextureInfo(const Shader::SamplerDesc& desc, } void DrawTextureInfo(TracePlayer& player, const Shader::SamplerDesc& desc) { auto gs = static_cast(player.graphics_system()); - auto cp = gs->command_processor(); + auto cp = static_cast(gs->command_processor()); auto& regs = *gs->register_file(); int r = XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + desc.fetch_slot * 6; @@ -1394,7 +597,7 @@ static const char* kEndiannessNames[] = { void DrawStateUI(xe::ui::Window* window, TracePlayer& player, Memory* memory) { auto gs = static_cast(player.graphics_system()); - auto cp = gs->command_processor(); + auto cp = static_cast(gs->command_processor()); auto& regs = *gs->register_file(); ImGui::SetNextWindowPos(ImVec2(float(window->width()) - 500 - 5, 30), @@ -2097,9 +1300,10 @@ void DrawPacketDisassemblerUI(xe::ui::Window* window, TracePlayer& player, trace_ptr += sizeof(*cmd); if (pending_packet) { PacketInfo packet_info = {0}; - if (DisasmPacket(reinterpret_cast(pending_packet) + - sizeof(PacketStartCommand), - &packet_info)) { + if (PacketDisassembler::DisasmPacket( + reinterpret_cast(pending_packet) + + sizeof(PacketStartCommand), + &packet_info)) { if (packet_info.predicated) { ImGui::PushStyleColor(ImGuiCol_Text, kColorIgnored); } diff --git a/src/xenia/gpu/gl4/premake5.lua b/src/xenia/gpu/gl4/premake5.lua index f70c1be2b..e1145d266 100644 --- a/src/xenia/gpu/gl4/premake5.lua +++ b/src/xenia/gpu/gl4/premake5.lua @@ -66,7 +66,7 @@ project("xenia-gpu-gl4-trace-viewer") project_root.."/build_tools/third_party/gflags/src", }) files({ - "trace_viewer_main.cc", + "gl4_trace_viewer_main.cc", "../../base/main_"..platform_suffix..".cc", }) diff --git a/src/xenia/gpu/graphics_system.cc b/src/xenia/gpu/graphics_system.cc index 97ac0c6ee..0d7ddb989 100644 --- a/src/xenia/gpu/graphics_system.cc +++ b/src/xenia/gpu/graphics_system.cc @@ -9,27 +9,30 @@ #include "xenia/gpu/graphics_system.h" +#include "xenia/base/clock.h" #include "xenia/base/logging.h" #include "xenia/base/math.h" -#include "xenia/cpu/processor.h" +#include "xenia/base/profiling.h" +#include "xenia/base/threading.h" +#include "xenia/gpu/command_processor.h" #include "xenia/gpu/gpu_flags.h" -#include "xenia/kernel/xthread.h" +#include "xenia/ui/loop.h" namespace xe { namespace gpu { namespace gl4 { -std::unique_ptr Create(Emulator* emulator); +std::unique_ptr Create(); } // namespace gl4 -std::unique_ptr GraphicsSystem::Create(Emulator* emulator) { +std::unique_ptr GraphicsSystem::Create() { if (FLAGS_gpu.compare("gl4") == 0) { - return xe::gpu::gl4::Create(emulator); + return xe::gpu::gl4::Create(); } else { // Create best available. std::unique_ptr best; - best = xe::gpu::gl4::Create(emulator); + best = xe::gpu::gl4::Create(); if (best) { return best; } @@ -39,21 +42,157 @@ std::unique_ptr GraphicsSystem::Create(Emulator* emulator) { } } -GraphicsSystem::GraphicsSystem(Emulator* emulator) : emulator_(emulator) {} +GraphicsSystem::GraphicsSystem() : vsync_worker_running_(false) {} GraphicsSystem::~GraphicsSystem() = default; -X_STATUS GraphicsSystem::Setup(cpu::Processor* processor, ui::Loop* target_loop, +X_STATUS GraphicsSystem::Setup(cpu::Processor* processor, + kernel::KernelState* kernel_state, ui::Window* target_window) { - processor_ = processor; memory_ = processor->memory(); - target_loop_ = target_loop; + processor_ = processor; + kernel_state_ = kernel_state; target_window_ = target_window; + // Initialize rendering context. + // This must happen on the UI thread. + std::unique_ptr processor_context; + target_window_->loop()->PostSynchronous([&]() { + // Setup the GL context the command processor will do all its drawing in. + // It's shared with the display context so that we can resolve framebuffers + // from it. + processor_context = target_window->context()->CreateShared(); + processor_context->ClearCurrent(); + }); + if (!processor_context) { + xe::FatalError( + "Unable to initialize GL context. Xenia requires OpenGL 4.5. Ensure " + "you have the latest drivers for your GPU and that it supports OpenGL " + "4.5. See http://xenia.jp/faq/ for more information."); + return X_STATUS_UNSUCCESSFUL; + } + + // Create command processor. This will spin up a thread to process all + // incoming ringbuffer packets. + command_processor_ = CreateCommandProcessor(); + if (!command_processor_->Initialize(std::move(processor_context))) { + XELOGE("Unable to initialize command processor"); + return X_STATUS_UNSUCCESSFUL; + } + command_processor_->set_swap_request_handler( + [this]() { target_window_->Invalidate(); }); + + // Watch for paint requests to do our swap. + target_window->on_painting.AddListener( + [this](xe::ui::UIEvent* e) { Swap(e); }); + + // Let the processor know we want register access callbacks. + memory_->AddVirtualMappedRange( + 0x7FC80000, 0xFFFF0000, 0x0000FFFF, this, + reinterpret_cast(ReadRegisterThunk), + reinterpret_cast(WriteRegisterThunk)); + + // 60hz vsync timer. + vsync_worker_running_ = true; + vsync_worker_thread_ = kernel::object_ref( + new kernel::XHostThread(kernel_state_, 128 * 1024, 0, [this]() { + uint64_t vsync_duration = FLAGS_vsync ? 16 : 1; + uint64_t last_frame_time = Clock::QueryGuestTickCount(); + while (vsync_worker_running_) { + uint64_t current_time = Clock::QueryGuestTickCount(); + uint64_t elapsed = (current_time - last_frame_time) / + (Clock::guest_tick_frequency() / 1000); + if (elapsed >= vsync_duration) { + MarkVblank(); + last_frame_time = current_time; + } + xe::threading::Sleep(std::chrono::milliseconds(1)); + } + return 0; + })); + // As we run vblank interrupts the debugger must be able to suspend us. + vsync_worker_thread_->set_can_debugger_suspend(true); + vsync_worker_thread_->set_name("GraphicsSystem Vsync"); + vsync_worker_thread_->Create(); + + if (FLAGS_trace_gpu_stream) { + BeginTracing(); + } + return X_STATUS_SUCCESS; } -void GraphicsSystem::Shutdown() {} +void GraphicsSystem::Shutdown() { + EndTracing(); + + vsync_worker_running_ = false; + vsync_worker_thread_->Wait(0, 0, 0, nullptr); + vsync_worker_thread_.reset(); + + command_processor_->Shutdown(); + + // TODO(benvanik): remove mapped range. + + command_processor_.reset(); +} + +uint32_t GraphicsSystem::ReadRegisterThunk(void* ppc_context, + GraphicsSystem* gs, uint32_t addr) { + return gs->ReadRegister(addr); +} + +void GraphicsSystem::WriteRegisterThunk(void* ppc_context, GraphicsSystem* gs, + uint32_t addr, uint32_t value) { + gs->WriteRegister(addr, value); +} + +uint32_t GraphicsSystem::ReadRegister(uint32_t addr) { + uint32_t r = addr & 0xFFFF; + + switch (r) { + case 0x3C00: // ? + return 0x08100748; + case 0x3C04: // ? + return 0x0000200E; + case 0x6530: // Scanline? + return 0x000002D0; + case 0x6544: // ? vblank pending? + return 1; + case 0x6584: // Screen res - 1280x720 + return 0x050002D0; + } + + assert_true(r < RegisterFile::kRegisterCount); + return register_file_.values[r].u32; +} + +void GraphicsSystem::WriteRegister(uint32_t addr, uint32_t value) { + uint32_t r = addr & 0xFFFF; + + switch (r) { + case 0x0714: // CP_RB_WPTR + command_processor_->UpdateWritePointer(value); + break; + case 0x6110: // ? swap related? + XELOGW("Unimplemented GPU register %.4X write: %.8X", r, value); + return; + default: + XELOGW("Unknown GPU register %.4X write: %.8X", r, value); + break; + } + + assert_true(r < RegisterFile::kRegisterCount); + register_file_.values[r].u32 = value; +} + +void GraphicsSystem::InitializeRingBuffer(uint32_t ptr, uint32_t page_count) { + command_processor_->InitializeRingBuffer(ptr, page_count); +} + +void GraphicsSystem::EnableReadPointerWriteBack(uint32_t ptr, + uint32_t block_size) { + command_processor_->EnableReadPointerWriteBack(ptr, block_size); +} void GraphicsSystem::SetInterruptCallback(uint32_t callback, uint32_t user_data) { @@ -84,5 +223,32 @@ void GraphicsSystem::DispatchInterruptCallback(uint32_t source, uint32_t cpu) { args, xe::countof(args)); } +void GraphicsSystem::MarkVblank() { + SCOPE_profile_cpu_f("gpu"); + + // Increment vblank counter (so the game sees us making progress). + command_processor_->increment_counter(); + + // TODO(benvanik): we shouldn't need to do the dispatch here, but there's + // something wrong and the CP will block waiting for code that + // needs to be run in the interrupt. + DispatchInterruptCallback(0, 2); +} + +void GraphicsSystem::ClearCaches() { + command_processor_->CallInThread( + [&]() { command_processor_->ClearCaches(); }); +} + +void GraphicsSystem::RequestFrameTrace() { + command_processor_->RequestFrameTrace(xe::to_wstring(FLAGS_trace_gpu_prefix)); +} + +void GraphicsSystem::BeginTracing() { + command_processor_->BeginTracing(xe::to_wstring(FLAGS_trace_gpu_prefix)); +} + +void GraphicsSystem::EndTracing() { command_processor_->EndTracing(); } + } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/graphics_system.h b/src/xenia/gpu/graphics_system.h index 509e8c86e..9ff96aa10 100644 --- a/src/xenia/gpu/graphics_system.h +++ b/src/xenia/gpu/graphics_system.h @@ -15,8 +15,9 @@ #include #include "xenia/cpu/processor.h" +#include "xenia/gpu/register_file.h" +#include "xenia/kernel/xthread.h" #include "xenia/memory.h" -#include "xenia/ui/loop.h" #include "xenia/ui/window.h" #include "xenia/xbox.h" @@ -27,51 +28,70 @@ class Emulator; namespace xe { namespace gpu { +class CommandProcessor; + class GraphicsSystem { public: virtual ~GraphicsSystem(); - static std::unique_ptr Create(Emulator* emulator); + static std::unique_ptr Create(); virtual std::unique_ptr CreateContext( ui::Window* target_window) = 0; - Emulator* emulator() const { return emulator_; } Memory* memory() const { return memory_; } cpu::Processor* processor() const { return processor_; } + kernel::KernelState* kernel_state() const { return kernel_state_; } - virtual X_STATUS Setup(cpu::Processor* processor, ui::Loop* target_loop, + virtual X_STATUS Setup(cpu::Processor* processor, + kernel::KernelState* kernel_state, ui::Window* target_window); virtual void Shutdown(); - void SetInterruptCallback(uint32_t callback, uint32_t user_data); - virtual void InitializeRingBuffer(uint32_t ptr, uint32_t page_count) = 0; - virtual void EnableReadPointerWriteBack(uint32_t ptr, - uint32_t block_size) = 0; + RegisterFile* register_file() { return ®ister_file_; } + CommandProcessor* command_processor() const { + return command_processor_.get(); + } + void InitializeRingBuffer(uint32_t ptr, uint32_t page_count); + void EnableReadPointerWriteBack(uint32_t ptr, uint32_t block_size); + + void SetInterruptCallback(uint32_t callback, uint32_t user_data); void DispatchInterruptCallback(uint32_t source, uint32_t cpu); - virtual void RequestFrameTrace() {} - virtual void BeginTracing() {} - virtual void EndTracing() {} - enum class TracePlaybackMode { - kUntilEnd, - kBreakOnSwap, - }; - virtual void PlayTrace(const uint8_t* trace_data, size_t trace_size, - TracePlaybackMode playback_mode) {} - virtual void ClearCaches() {} + virtual void ClearCaches(); + + void RequestFrameTrace(); + void BeginTracing(); + void EndTracing(); protected: - explicit GraphicsSystem(Emulator* emulator); + GraphicsSystem(); + + virtual std::unique_ptr CreateCommandProcessor() = 0; + + static uint32_t ReadRegisterThunk(void* ppc_context, GraphicsSystem* gs, + uint32_t addr); + static void WriteRegisterThunk(void* ppc_context, GraphicsSystem* gs, + uint32_t addr, uint32_t value); + uint32_t ReadRegister(uint32_t addr); + void WriteRegister(uint32_t addr, uint32_t value); + + void MarkVblank(); + virtual void Swap(xe::ui::UIEvent* e) = 0; - Emulator* emulator_ = nullptr; Memory* memory_ = nullptr; cpu::Processor* processor_ = nullptr; - ui::Loop* target_loop_ = nullptr; + kernel::KernelState* kernel_state_ = nullptr; ui::Window* target_window_ = nullptr; uint32_t interrupt_callback_ = 0; uint32_t interrupt_callback_data_ = 0; + + std::atomic vsync_worker_running_; + kernel::object_ref vsync_worker_thread_; + + RegisterFile register_file_; + std::unique_ptr command_processor_; }; } // namespace gpu diff --git a/src/xenia/gpu/packet_disassembler.cc b/src/xenia/gpu/packet_disassembler.cc new file mode 100644 index 000000000..198323294 --- /dev/null +++ b/src/xenia/gpu/packet_disassembler.cc @@ -0,0 +1,498 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2015 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/packet_disassembler.h" + +#include "xenia/gpu/xenos.h" + +namespace xe { +namespace gpu { + +using namespace xe::gpu::xenos; + +PacketCategory PacketDisassembler::GetPacketCategory(const uint8_t* base_ptr) { + const uint32_t packet = xe::load_and_swap(base_ptr); + const uint32_t packet_type = packet >> 30; + switch (packet_type) { + case 0x00: + case 0x01: + case 0x02: { + return PacketCategory::kGeneric; + } + case 0x03: { + uint32_t opcode = (packet >> 8) & 0x7F; + switch (opcode) { + case PM4_DRAW_INDX: + case PM4_DRAW_INDX_2: + return PacketCategory::kDraw; + case PM4_XE_SWAP: + return PacketCategory::kSwap; + default: + return PacketCategory::kGeneric; + } + } + default: { + assert_unhandled_case(packet_type); + return PacketCategory::kGeneric; + } + } +} + +bool PacketDisassembler::DisasmPacketType0(const uint8_t* base_ptr, + uint32_t packet, + PacketInfo* out_info) { + static const PacketTypeInfo type_0_info = {PacketCategory::kGeneric, + "PM4_TYPE0"}; + out_info->type_info = &type_0_info; + + uint32_t count = ((packet >> 16) & 0x3FFF) + 1; + out_info->count = 1 + count; + auto ptr = base_ptr + 4; + + uint32_t base_index = (packet & 0x7FFF); + uint32_t write_one_reg = (packet >> 15) & 0x1; + for (uint32_t m = 0; m < count; m++) { + uint32_t reg_data = xe::load_and_swap(ptr); + uint32_t target_index = write_one_reg ? base_index : base_index + m; + out_info->actions.emplace_back( + PacketAction::RegisterWrite(target_index, reg_data)); + ptr += 4; + } + + return true; +} + +bool PacketDisassembler::DisasmPacketType1(const uint8_t* base_ptr, + uint32_t packet, + PacketInfo* out_info) { + static const PacketTypeInfo type_1_info = {PacketCategory::kGeneric, + "PM4_TYPE1"}; + out_info->type_info = &type_1_info; + + out_info->count = 1 + 2; + auto ptr = base_ptr + 4; + + uint32_t reg_index_1 = packet & 0x7FF; + uint32_t reg_index_2 = (packet >> 11) & 0x7FF; + uint32_t reg_data_1 = xe::load_and_swap(ptr); + uint32_t reg_data_2 = xe::load_and_swap(ptr + 4); + out_info->actions.emplace_back( + PacketAction::RegisterWrite(reg_index_1, reg_data_1)); + out_info->actions.emplace_back( + PacketAction::RegisterWrite(reg_index_2, reg_data_2)); + + return true; +} + +bool PacketDisassembler::DisasmPacketType2(const uint8_t* base_ptr, + uint32_t packet, + PacketInfo* out_info) { + static const PacketTypeInfo type_2_info = {PacketCategory::kGeneric, + "PM4_TYPE2"}; + out_info->type_info = &type_2_info; + + out_info->count = 1; + + return true; +} + +bool PacketDisassembler::DisasmPacketType3(const uint8_t* base_ptr, + uint32_t packet, + PacketInfo* out_info) { + static const PacketTypeInfo type_3_unknown_info = {PacketCategory::kGeneric, + "PM4_TYPE3_UNKNOWN"}; + out_info->type_info = &type_3_unknown_info; + + uint32_t opcode = (packet >> 8) & 0x7F; + uint32_t count = ((packet >> 16) & 0x3FFF) + 1; + out_info->count = 1 + count; + auto ptr = base_ptr + 4; + + if (packet & 1) { + out_info->predicated = true; + } + + bool result = true; + switch (opcode) { + case PM4_ME_INIT: { + // initialize CP's micro-engine + static const PacketTypeInfo op_info = {PacketCategory::kGeneric, + "PM4_ME_INIT"}; + out_info->type_info = &op_info; + break; + } + case PM4_NOP: { + // skip N 32-bit words to get to the next packet + // No-op, ignore some data. + static const PacketTypeInfo op_info = {PacketCategory::kGeneric, + "PM4_NOP"}; + out_info->type_info = &op_info; + break; + } + case PM4_INTERRUPT: { + // generate interrupt from the command stream + static const PacketTypeInfo op_info = {PacketCategory::kGeneric, + "PM4_INTERRUPT"}; + out_info->type_info = &op_info; + uint32_t cpu_mask = xe::load_and_swap(ptr + 0); + for (int n = 0; n < 6; n++) { + if (cpu_mask & (1 << n)) { + // graphics_system_->DispatchInterruptCallback(1, n); + } + } + break; + } + case PM4_XE_SWAP: { + // Xenia-specific VdSwap hook. + // VdSwap will post this to tell us we need to swap the screen/fire an + // interrupt. + // 63 words here, but only the first has any data. + static const PacketTypeInfo op_info = {PacketCategory::kSwap, + "PM4_XE_SWAP"}; + out_info->type_info = &op_info; + uint32_t frontbuffer_ptr = xe::load_and_swap(ptr + 0); + break; + } + case PM4_INDIRECT_BUFFER: { + // indirect buffer dispatch + static const PacketTypeInfo op_info = {PacketCategory::kGeneric, + "PM4_INDIRECT_BUFFER"}; + out_info->type_info = &op_info; + uint32_t list_ptr = xe::load_and_swap(ptr + 0); + uint32_t list_length = xe::load_and_swap(ptr + 4); + break; + } + case PM4_WAIT_REG_MEM: { + // wait until a register or memory location is a specific value + static const PacketTypeInfo op_info = {PacketCategory::kGeneric, + "PM4_WAIT_REG_MEM"}; + out_info->type_info = &op_info; + uint32_t wait_info = xe::load_and_swap(ptr + 0); + uint32_t poll_reg_addr = xe::load_and_swap(ptr + 4); + uint32_t ref = xe::load_and_swap(ptr + 8); + uint32_t mask = xe::load_and_swap(ptr + 12); + uint32_t wait = xe::load_and_swap(ptr + 16); + break; + } + case PM4_REG_RMW: { + // register read/modify/write + // ? (used during shader upload and edram setup) + static const PacketTypeInfo op_info = {PacketCategory::kGeneric, + "PM4_REG_RMW"}; + out_info->type_info = &op_info; + uint32_t rmw_info = xe::load_and_swap(ptr + 0); + uint32_t and_mask = xe::load_and_swap(ptr + 4); + uint32_t or_mask = xe::load_and_swap(ptr + 8); + break; + } + case PM4_COND_WRITE: { + // conditional write to memory or register + static const PacketTypeInfo op_info = {PacketCategory::kGeneric, + "PM4_COND_WRITE"}; + out_info->type_info = &op_info; + uint32_t wait_info = xe::load_and_swap(ptr + 0); + uint32_t poll_reg_addr = xe::load_and_swap(ptr + 4); + uint32_t ref = xe::load_and_swap(ptr + 8); + uint32_t mask = xe::load_and_swap(ptr + 12); + uint32_t write_reg_addr = xe::load_and_swap(ptr + 16); + uint32_t write_data = xe::load_and_swap(ptr + 20); + break; + } + case PM4_EVENT_WRITE: { + // generate an event that creates a write to memory when completed + static const PacketTypeInfo op_info = {PacketCategory::kGeneric, + "PM4_EVENT_WRITE"}; + out_info->type_info = &op_info; + uint32_t initiator = xe::load_and_swap(ptr + 0); + break; + } + case PM4_EVENT_WRITE_SHD: { + // generate a VS|PS_done event + static const PacketTypeInfo op_info = {PacketCategory::kGeneric, + "PM4_EVENT_WRITE_SHD"}; + out_info->type_info = &op_info; + uint32_t initiator = xe::load_and_swap(ptr + 0); + uint32_t address = xe::load_and_swap(ptr + 4); + uint32_t value = xe::load_and_swap(ptr + 8); + break; + } + case PM4_EVENT_WRITE_EXT: { + // generate a screen extent event + static const PacketTypeInfo op_info = {PacketCategory::kGeneric, + "PM4_EVENT_WRITE_EXT"}; + out_info->type_info = &op_info; + uint32_t unk0 = xe::load_and_swap(ptr + 0); + uint32_t unk1 = xe::load_and_swap(ptr + 4); + break; + } + case PM4_DRAW_INDX: { + // initiate fetch of index buffer and draw + // dword0 = viz query info + static const PacketTypeInfo op_info = {PacketCategory::kDraw, + "PM4_DRAW_INDX"}; + out_info->type_info = &op_info; + uint32_t dword0 = xe::load_and_swap(ptr + 0); + uint32_t dword1 = xe::load_and_swap(ptr + 4); + uint32_t index_count = dword1 >> 16; + auto prim_type = static_cast(dword1 & 0x3F); + uint32_t src_sel = (dword1 >> 6) & 0x3; + if (src_sel == 0x0) { + // Indexed draw. + uint32_t guest_base = xe::load_and_swap(ptr + 8); + uint32_t index_size = xe::load_and_swap(ptr + 12); + auto endianness = static_cast(index_size >> 30); + index_size &= 0x00FFFFFF; + bool index_32bit = (dword1 >> 11) & 0x1; + index_size *= index_32bit ? 4 : 2; + } else if (src_sel == 0x2) { + // Auto draw. + } else { + // Unknown source select. + assert_always(); + } + break; + } + case PM4_DRAW_INDX_2: { + // draw using supplied indices in packet + static const PacketTypeInfo op_info = {PacketCategory::kDraw, + "PM4_DRAW_INDX_2"}; + out_info->type_info = &op_info; + uint32_t dword0 = xe::load_and_swap(ptr + 0); + uint32_t index_count = dword0 >> 16; + auto prim_type = static_cast(dword0 & 0x3F); + uint32_t src_sel = (dword0 >> 6) & 0x3; + assert_true(src_sel == 0x2); // 'SrcSel=AutoIndex' + bool index_32bit = (dword0 >> 11) & 0x1; + uint32_t indices_size = index_count * (index_32bit ? 4 : 2); + auto index_ptr = ptr + 4; + break; + } + case PM4_SET_CONSTANT: { + // load constant into chip and to memory + // PM4_REG(reg) ((0x4 << 16) | (GSL_HAL_SUBBLOCK_OFFSET(reg))) + // reg - 0x2000 + static const PacketTypeInfo op_info = {PacketCategory::kGeneric, + "PM4_SET_CONSTANT"}; + out_info->type_info = &op_info; + uint32_t offset_type = xe::load_and_swap(ptr + 0); + uint32_t index = offset_type & 0x7FF; + uint32_t type = (offset_type >> 16) & 0xFF; + switch (type) { + case 0: // ALU + index += 0x4000; + break; + case 1: // FETCH + index += 0x4800; + break; + case 2: // BOOL + index += 0x4900; + break; + case 3: // LOOP + index += 0x4908; + break; + case 4: // REGISTERS + index += 0x2000; + break; + default: + assert_always(); + result = false; + break; + } + for (uint32_t n = 0; n < count - 1; n++, index++) { + uint32_t data = xe::load_and_swap(ptr + 4 + n * 4); + out_info->actions.emplace_back( + PacketAction::RegisterWrite(index, data)); + } + break; + } + case PM4_SET_CONSTANT2: { + static const PacketTypeInfo op_info = {PacketCategory::kGeneric, + "PM4_SET_CONSTANT2"}; + out_info->type_info = &op_info; + uint32_t offset_type = xe::load_and_swap(ptr + 0); + uint32_t index = offset_type & 0xFFFF; + for (uint32_t n = 0; n < count - 1; n++, index++) { + uint32_t data = xe::load_and_swap(ptr + 4 + n * 4); + out_info->actions.emplace_back( + PacketAction::RegisterWrite(index, data)); + } + return true; + break; + } + case PM4_LOAD_ALU_CONSTANT: { + // load constants from memory + static const PacketTypeInfo op_info = {PacketCategory::kGeneric, + "PM4_LOAD_ALU_CONSTANT"}; + out_info->type_info = &op_info; + uint32_t address = xe::load_and_swap(ptr + 0); + address &= 0x3FFFFFFF; + uint32_t offset_type = xe::load_and_swap(ptr + 4); + uint32_t index = offset_type & 0x7FF; + uint32_t size_dwords = xe::load_and_swap(ptr + 8); + size_dwords &= 0xFFF; + uint32_t type = (offset_type >> 16) & 0xFF; + switch (type) { + case 0: // ALU + index += 0x4000; + break; + case 1: // FETCH + index += 0x4800; + break; + case 2: // BOOL + index += 0x4900; + break; + case 3: // LOOP + index += 0x4908; + break; + case 4: // REGISTERS + index += 0x2000; + break; + default: + assert_always(); + return true; + } + for (uint32_t n = 0; n < size_dwords; n++, index++) { + // Hrm, ? + // xe::load_and_swap(membase_ + GpuToCpu(address + n * 4)); + uint32_t data = 0xDEADBEEF; + out_info->actions.emplace_back( + PacketAction::RegisterWrite(index, data)); + } + break; + } + case PM4_SET_SHADER_CONSTANTS: { + static const PacketTypeInfo op_info = {PacketCategory::kGeneric, + "PM4_SET_SHADER_CONSTANTS"}; + out_info->type_info = &op_info; + uint32_t offset_type = xe::load_and_swap(ptr + 0); + uint32_t index = offset_type & 0xFFFF; + for (uint32_t n = 0; n < count - 1; n++, index++) { + uint32_t data = xe::load_and_swap(ptr + 4 + n * 4); + out_info->actions.emplace_back( + PacketAction::RegisterWrite(index, data)); + } + return true; + } + case PM4_IM_LOAD: { + // load sequencer instruction memory (pointer-based) + static const PacketTypeInfo op_info = {PacketCategory::kGeneric, + "PM4_IM_LOAD"}; + out_info->type_info = &op_info; + uint32_t addr_type = xe::load_and_swap(ptr + 0); + auto shader_type = static_cast(addr_type & 0x3); + uint32_t addr = addr_type & ~0x3; + uint32_t start_size = xe::load_and_swap(ptr + 4); + uint32_t start = start_size >> 16; + uint32_t size_dwords = start_size & 0xFFFF; // dwords + assert_true(start == 0); + break; + } + case PM4_IM_LOAD_IMMEDIATE: { + // load sequencer instruction memory (code embedded in packet) + static const PacketTypeInfo op_info = {PacketCategory::kGeneric, + "PM4_IM_LOAD_IMMEDIATE"}; + out_info->type_info = &op_info; + uint32_t dword0 = xe::load_and_swap(ptr + 0); + uint32_t dword1 = xe::load_and_swap(ptr + 4); + auto shader_type = static_cast(dword0); + uint32_t start_size = dword1; + uint32_t start = start_size >> 16; + uint32_t size_dwords = start_size & 0xFFFF; // dwords + assert_true(start == 0); + break; + } + case PM4_INVALIDATE_STATE: { + // selective invalidation of state pointers + static const PacketTypeInfo op_info = {PacketCategory::kGeneric, + "PM4_INVALIDATE_STATE"}; + out_info->type_info = &op_info; + uint32_t mask = xe::load_and_swap(ptr + 0); + break; + } + case PM4_SET_BIN_MASK_LO: { + static const PacketTypeInfo op_info = {PacketCategory::kGeneric, + "PM4_SET_BIN_MASK_LO"}; + out_info->type_info = &op_info; + uint32_t value = xe::load_and_swap(ptr); + // bin_mask_ = (bin_mask_ & 0xFFFFFFFF00000000ull) | value; + out_info->actions.emplace_back(PacketAction::SetBinMask(value)); + break; + } + case PM4_SET_BIN_MASK_HI: { + static const PacketTypeInfo op_info = {PacketCategory::kGeneric, + "PM4_SET_BIN_MASK_HI"}; + out_info->type_info = &op_info; + uint32_t value = xe::load_and_swap(ptr); + // bin_mask_ = + // (bin_mask_ & 0xFFFFFFFFull) | (static_cast(value) << 32); + break; + } + case PM4_SET_BIN_SELECT_LO: { + static const PacketTypeInfo op_info = {PacketCategory::kGeneric, + "PM4_SET_BIN_SELECT_LO"}; + out_info->type_info = &op_info; + uint32_t value = xe::load_and_swap(ptr); + // bin_select_ = (bin_select_ & 0xFFFFFFFF00000000ull) | value; + out_info->actions.emplace_back(PacketAction::SetBinSelect(value)); + break; + } + case PM4_SET_BIN_SELECT_HI: { + static const PacketTypeInfo op_info = {PacketCategory::kGeneric, + "PM4_SET_BIN_SELECT_HI"}; + out_info->type_info = &op_info; + uint32_t value = xe::load_and_swap(ptr); + // bin_select_ = + // (bin_select_ & 0xFFFFFFFFull) | (static_cast(value) << + // 32); + break; + } + + // Ignored packets - useful if breaking on the default handler below. + case 0x50: { // 0xC0015000 usually 2 words, 0xFFFFFFFF / 0x00000000 + static const PacketTypeInfo op_info = {PacketCategory::kGeneric, + "PM4_TYPE3_0x50"}; + out_info->type_info = &op_info; + break; + } + case 0x51: { // 0xC0015100 usually 2 words, 0xFFFFFFFF / 0xFFFFFFFF + static const PacketTypeInfo op_info = {PacketCategory::kGeneric, + "PM4_TYPE3_0x51"}; + out_info->type_info = &op_info; + break; + } + default: { + result = false; + break; + } + } + + return result; +} + +bool PacketDisassembler::DisasmPacket(const uint8_t* base_ptr, + PacketInfo* out_info) { + const uint32_t packet = xe::load_and_swap(base_ptr); + const uint32_t packet_type = packet >> 30; + switch (packet_type) { + case 0x00: + return DisasmPacketType0(base_ptr, packet, out_info); + case 0x01: + return DisasmPacketType1(base_ptr, packet, out_info); + case 0x02: + return DisasmPacketType2(base_ptr, packet, out_info); + case 0x03: + return DisasmPacketType3(base_ptr, packet, out_info); + default: + assert_unhandled_case(packet_type); + return false; + } +} + +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/packet_disassembler.h b/src/xenia/gpu/packet_disassembler.h new file mode 100644 index 000000000..942a88409 --- /dev/null +++ b/src/xenia/gpu/packet_disassembler.h @@ -0,0 +1,103 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2015 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_PACKET_DISASSEMBLER_H_ +#define XENIA_GPU_PACKET_DISASSEMBLER_H_ + +#include + +#include "xenia/gpu/register_file.h" +#include "xenia/gpu/trace_protocol.h" +#include "xenia/gpu/trace_reader.h" +#include "xenia/memory.h" + +namespace xe { +namespace gpu { + +enum class PacketCategory { + kGeneric, + kDraw, + kSwap, +}; + +struct PacketTypeInfo { + PacketCategory category; + const char* name; +}; + +struct PacketAction { + enum class Type { + kRegisterWrite, + kSetBinMask, + kSetBinSelect, + }; + Type type; + + union { + struct { + uint32_t index; + RegisterFile::RegisterValue value; + } register_write; + struct { + uint64_t value; + } set_bin_mask; + struct { + uint64_t value; + } set_bin_select; + }; + + static PacketAction RegisterWrite(uint32_t index, uint32_t value) { + PacketAction action; + action.type = Type::kRegisterWrite; + action.register_write.index = index; + action.register_write.value.u32 = value; + return action; + } + + static PacketAction SetBinMask(uint64_t value) { + PacketAction action; + action.type = Type::kSetBinMask; + action.set_bin_mask.value = value; + return action; + } + + static PacketAction SetBinSelect(uint64_t value) { + PacketAction action; + action.type = Type::kSetBinSelect; + action.set_bin_select.value = value; + return action; + } +}; + +struct PacketInfo { + const PacketTypeInfo* type_info; + bool predicated; + uint32_t count; + std::vector actions; +}; + +class PacketDisassembler { + public: + static PacketCategory GetPacketCategory(const uint8_t* base_ptr); + + static bool DisasmPacketType0(const uint8_t* base_ptr, uint32_t packet, + PacketInfo* out_info); + static bool DisasmPacketType1(const uint8_t* base_ptr, uint32_t packet, + PacketInfo* out_info); + static bool DisasmPacketType2(const uint8_t* base_ptr, uint32_t packet, + PacketInfo* out_info); + static bool DisasmPacketType3(const uint8_t* base_ptr, uint32_t packet, + PacketInfo* out_info); + static bool DisasmPacket(const uint8_t* base_ptr, PacketInfo* out_info); +}; + +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_PACKET_DISASSEMBLER_H_ diff --git a/src/xenia/gpu/trace_player.cc b/src/xenia/gpu/trace_player.cc new file mode 100644 index 000000000..49f91d82d --- /dev/null +++ b/src/xenia/gpu/trace_player.cc @@ -0,0 +1,186 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2015 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/trace_player.h" + +#include "xenia/gpu/command_processor.h" +#include "xenia/gpu/graphics_system.h" +#include "xenia/memory.h" + +namespace xe { +namespace gpu { + +TracePlayer::TracePlayer(xe::ui::Loop* loop, GraphicsSystem* graphics_system) + : loop_(loop), + graphics_system_(graphics_system), + current_frame_index_(0), + current_command_index_(-1) { + // Need to allocate all of physical memory so that we can write to it + // during playback. + graphics_system_->memory() + ->LookupHeapByType(true, 4096) + ->AllocFixed(0, 0x1FFFFFFF, 4096, + kMemoryAllocationReserve | kMemoryAllocationCommit, + kMemoryProtectRead | kMemoryProtectWrite); +} + +TracePlayer::~TracePlayer() = default; + +const TraceReader::Frame* TracePlayer::current_frame() const { + if (current_frame_index_ > frame_count()) { + return nullptr; + } + return frame(current_frame_index_); +} + +void TracePlayer::SeekFrame(int target_frame) { + if (current_frame_index_ == target_frame) { + return; + } + current_frame_index_ = target_frame; + auto frame = current_frame(); + current_command_index_ = int(frame->commands.size()) - 1; + + assert_true(frame->start_ptr <= frame->end_ptr); + PlayTrace(frame->start_ptr, frame->end_ptr - frame->start_ptr, + TracePlaybackMode::kBreakOnSwap); +} + +void TracePlayer::SeekCommand(int target_command) { + if (current_command_index_ == target_command) { + return; + } + int previous_command_index = current_command_index_; + current_command_index_ = target_command; + if (current_command_index_ == -1) { + return; + } + auto frame = current_frame(); + const auto& command = frame->commands[target_command]; + assert_true(frame->start_ptr <= command.end_ptr); + if (target_command && previous_command_index == target_command - 1) { + // Seek forward. + const auto& previous_command = frame->commands[target_command - 1]; + PlayTrace(previous_command.end_ptr, + command.end_ptr - previous_command.end_ptr, + TracePlaybackMode::kBreakOnSwap); + } else { + // Full playback from frame start. + PlayTrace(frame->start_ptr, command.end_ptr - frame->start_ptr, + TracePlaybackMode::kBreakOnSwap); + } +} + +void TracePlayer::PlayTrace(const uint8_t* trace_data, size_t trace_size, + TracePlaybackMode playback_mode) { + graphics_system_->command_processor()->CallInThread( + [this, trace_data, trace_size, playback_mode]() { + PlayTraceOnThread(trace_data, trace_size, playback_mode); + }); +} + +void TracePlayer::PlayTraceOnThread(const uint8_t* trace_data, + size_t trace_size, + TracePlaybackMode playback_mode) { + auto memory = graphics_system_->memory(); + auto command_processor = graphics_system_->command_processor(); + + command_processor->set_swap_mode(SwapMode::kIgnored); + + auto trace_ptr = trace_data; + bool pending_break = false; + const PacketStartCommand* pending_packet = nullptr; + while (trace_ptr < trace_data + trace_size) { + auto type = static_cast(xe::load(trace_ptr)); + switch (type) { + case TraceCommandType::kPrimaryBufferStart: { + auto cmd = + reinterpret_cast(trace_ptr); + // + trace_ptr += sizeof(*cmd) + cmd->count * 4; + break; + } + case TraceCommandType::kPrimaryBufferEnd: { + auto cmd = reinterpret_cast(trace_ptr); + // + trace_ptr += sizeof(*cmd); + break; + } + case TraceCommandType::kIndirectBufferStart: { + auto cmd = + reinterpret_cast(trace_ptr); + // + trace_ptr += sizeof(*cmd) + cmd->count * 4; + break; + } + case TraceCommandType::kIndirectBufferEnd: { + auto cmd = reinterpret_cast(trace_ptr); + // + trace_ptr += sizeof(*cmd); + break; + } + case TraceCommandType::kPacketStart: { + auto cmd = reinterpret_cast(trace_ptr); + trace_ptr += sizeof(*cmd); + std::memcpy(memory->TranslatePhysical(cmd->base_ptr), trace_ptr, + cmd->count * 4); + trace_ptr += cmd->count * 4; + pending_packet = cmd; + break; + } + case TraceCommandType::kPacketEnd: { + auto cmd = reinterpret_cast(trace_ptr); + trace_ptr += sizeof(*cmd); + if (pending_packet) { + command_processor->ExecutePacket(pending_packet->base_ptr, + pending_packet->count); + pending_packet = nullptr; + } + if (pending_break) { + return; + } + break; + } + case TraceCommandType::kMemoryRead: { + auto cmd = reinterpret_cast(trace_ptr); + trace_ptr += sizeof(*cmd); + std::memcpy(memory->TranslatePhysical(cmd->base_ptr), trace_ptr, + cmd->length); + trace_ptr += cmd->length; + break; + } + case TraceCommandType::kMemoryWrite: { + auto cmd = reinterpret_cast(trace_ptr); + trace_ptr += sizeof(*cmd); + // ? + trace_ptr += cmd->length; + break; + } + case TraceCommandType::kEvent: { + auto cmd = reinterpret_cast(trace_ptr); + trace_ptr += sizeof(*cmd); + switch (cmd->event_type) { + case EventType::kSwap: { + if (playback_mode == TracePlaybackMode::kBreakOnSwap) { + pending_break = true; + } + break; + } + } + break; + } + } + } + + command_processor->set_swap_mode(SwapMode::kNormal); + command_processor->IssueSwap(0, 1280, 720); +} + +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/trace_player.h b/src/xenia/gpu/trace_player.h new file mode 100644 index 000000000..6e78ac3a6 --- /dev/null +++ b/src/xenia/gpu/trace_player.h @@ -0,0 +1,57 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2015 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_TRACE_PLAYER_H_ +#define XENIA_GPU_TRACE_PLAYER_H_ + +#include + +#include "xenia/gpu/trace_protocol.h" +#include "xenia/gpu/trace_reader.h" +#include "xenia/ui/loop.h" + +namespace xe { +namespace gpu { + +class GraphicsSystem; + +enum class TracePlaybackMode { + kUntilEnd, + kBreakOnSwap, +}; + +class TracePlayer : public TraceReader { + public: + TracePlayer(xe::ui::Loop* loop, GraphicsSystem* graphics_system); + ~TracePlayer() override; + + GraphicsSystem* graphics_system() const { return graphics_system_; } + int current_frame_index() const { return current_frame_index_; } + int current_command_index() const { return current_command_index_; } + const Frame* current_frame() const; + + void SeekFrame(int target_frame); + void SeekCommand(int target_command); + + private: + void PlayTrace(const uint8_t* trace_data, size_t trace_size, + TracePlaybackMode playback_mode); + void PlayTraceOnThread(const uint8_t* trace_data, size_t trace_size, + TracePlaybackMode playback_mode); + + xe::ui::Loop* loop_; + GraphicsSystem* graphics_system_; + int current_frame_index_; + int current_command_index_; +}; + +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_TRACE_PLAYER_H_ diff --git a/src/xenia/gpu/trace_protocol.h b/src/xenia/gpu/trace_protocol.h new file mode 100644 index 000000000..1779b69f8 --- /dev/null +++ b/src/xenia/gpu/trace_protocol.h @@ -0,0 +1,84 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2015 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_TRACE_PROTOCOL_H_ +#define XENIA_GPU_TRACE_PROTOCOL_H_ + +#include + +namespace xe { +namespace gpu { + +enum class TraceCommandType : uint32_t { + kPrimaryBufferStart, + kPrimaryBufferEnd, + kIndirectBufferStart, + kIndirectBufferEnd, + kPacketStart, + kPacketEnd, + kMemoryRead, + kMemoryWrite, + kEvent, +}; + +struct PrimaryBufferStartCommand { + TraceCommandType type; + uint32_t base_ptr; + uint32_t count; +}; + +struct PrimaryBufferEndCommand { + TraceCommandType type; +}; + +struct IndirectBufferStartCommand { + TraceCommandType type; + uint32_t base_ptr; + uint32_t count; +}; + +struct IndirectBufferEndCommand { + TraceCommandType type; +}; + +struct PacketStartCommand { + TraceCommandType type; + uint32_t base_ptr; + uint32_t count; +}; + +struct PacketEndCommand { + TraceCommandType type; +}; + +struct MemoryReadCommand { + TraceCommandType type; + uint32_t base_ptr; + uint32_t length; +}; + +struct MemoryWriteCommand { + TraceCommandType type; + uint32_t base_ptr; + uint32_t length; +}; + +enum class EventType { + kSwap, +}; + +struct EventCommand { + TraceCommandType type; + EventType event_type; +}; + +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_TRACE_PROTOCOL_H_ diff --git a/src/xenia/gpu/trace_reader.cc b/src/xenia/gpu/trace_reader.cc new file mode 100644 index 000000000..8ed29e4ce --- /dev/null +++ b/src/xenia/gpu/trace_reader.cc @@ -0,0 +1,152 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2015 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/trace_reader.h" + +#include "xenia/base/mapped_memory.h" +#include "xenia/gpu/packet_disassembler.h" +#include "xenia/gpu/trace_protocol.h" +#include "xenia/memory.h" + +namespace xe { +namespace gpu { + +bool TraceReader::Open(const std::wstring& path) { + Close(); + + mmap_ = MappedMemory::Open(path, MappedMemory::Mode::kRead); + if (!mmap_) { + return false; + } + + trace_data_ = reinterpret_cast(mmap_->data()); + trace_size_ = mmap_->size(); + + ParseTrace(); + + return true; +} + +void TraceReader::Close() { + mmap_.reset(); + trace_data_ = nullptr; + trace_size_ = 0; +} + +void TraceReader::ParseTrace() { + auto trace_ptr = trace_data_; + Frame current_frame = { + trace_ptr, nullptr, 0, + }; + const PacketStartCommand* packet_start = nullptr; + const uint8_t* packet_start_ptr = nullptr; + const uint8_t* last_ptr = trace_ptr; + bool pending_break = false; + while (trace_ptr < trace_data_ + trace_size_) { + ++current_frame.command_count; + auto type = static_cast(xe::load(trace_ptr)); + switch (type) { + case TraceCommandType::kPrimaryBufferStart: { + auto cmd = + reinterpret_cast(trace_ptr); + trace_ptr += sizeof(*cmd) + cmd->count * 4; + break; + } + case TraceCommandType::kPrimaryBufferEnd: { + auto cmd = reinterpret_cast(trace_ptr); + trace_ptr += sizeof(*cmd); + break; + } + case TraceCommandType::kIndirectBufferStart: { + auto cmd = + reinterpret_cast(trace_ptr); + trace_ptr += sizeof(*cmd) + cmd->count * 4; + break; + } + case TraceCommandType::kIndirectBufferEnd: { + auto cmd = reinterpret_cast(trace_ptr); + trace_ptr += sizeof(*cmd); + break; + } + case TraceCommandType::kPacketStart: { + auto cmd = reinterpret_cast(trace_ptr); + packet_start_ptr = trace_ptr; + packet_start = cmd; + trace_ptr += sizeof(*cmd) + cmd->count * 4; + break; + } + case TraceCommandType::kPacketEnd: { + auto cmd = reinterpret_cast(trace_ptr); + trace_ptr += sizeof(*cmd); + if (!packet_start_ptr) { + continue; + } + auto packet_category = PacketDisassembler::GetPacketCategory( + packet_start_ptr + sizeof(*packet_start)); + switch (packet_category) { + case PacketCategory::kDraw: { + Frame::Command command; + command.type = Frame::Command::Type::kDraw; + command.head_ptr = packet_start_ptr; + command.start_ptr = last_ptr; + command.end_ptr = trace_ptr; + current_frame.commands.push_back(std::move(command)); + last_ptr = trace_ptr; + break; + } + case PacketCategory::kSwap: { + // + break; + } + } + if (pending_break) { + current_frame.end_ptr = trace_ptr; + frames_.push_back(std::move(current_frame)); + current_frame.start_ptr = trace_ptr; + current_frame.end_ptr = nullptr; + current_frame.command_count = 0; + pending_break = false; + } + break; + } + case TraceCommandType::kMemoryRead: { + auto cmd = reinterpret_cast(trace_ptr); + trace_ptr += sizeof(*cmd) + cmd->length; + break; + } + case TraceCommandType::kMemoryWrite: { + auto cmd = reinterpret_cast(trace_ptr); + trace_ptr += sizeof(*cmd) + cmd->length; + break; + } + case TraceCommandType::kEvent: { + auto cmd = reinterpret_cast(trace_ptr); + trace_ptr += sizeof(*cmd); + switch (cmd->event_type) { + case EventType::kSwap: { + pending_break = true; + break; + } + } + break; + } + default: + // Broken trace file? + assert_unhandled_case(type); + break; + } + } + if (pending_break || current_frame.command_count) { + current_frame.end_ptr = trace_ptr; + frames_.push_back(std::move(current_frame)); + } +} + +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/trace_reader.h b/src/xenia/gpu/trace_reader.h new file mode 100644 index 000000000..8b5798772 --- /dev/null +++ b/src/xenia/gpu/trace_reader.h @@ -0,0 +1,102 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2015 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_TRACE_READER_H_ +#define XENIA_GPU_TRACE_READER_H_ + +#include + +#include "xenia/base/mapped_memory.h" +#include "xenia/gpu/trace_protocol.h" +#include "xenia/memory.h" + +namespace xe { +namespace gpu { + +// void Foo() { +// auto trace_ptr = trace_data; +// while (trace_ptr < trace_data + trace_size) { +// auto cmd_type = *reinterpret_cast(trace_ptr); +// switch (cmd_type) { +// case TraceCommandType::kPrimaryBufferStart: +// break; +// case TraceCommandType::kPrimaryBufferEnd: +// break; +// case TraceCommandType::kIndirectBufferStart: +// break; +// case TraceCommandType::kIndirectBufferEnd: +// break; +// case TraceCommandType::kPacketStart: +// break; +// case TraceCommandType::kPacketEnd: +// break; +// case TraceCommandType::kMemoryRead: +// break; +// case TraceCommandType::kMemoryWrite: +// break; +// case TraceCommandType::kEvent: +// break; +// } +// /*trace_ptr = graphics_system->PlayTrace( +// trace_ptr, trace_size - (trace_ptr - trace_data), +// GraphicsSystem::TracePlaybackMode::kBreakOnSwap);*/ +// } +//} + +class TraceReader { + public: + struct Frame { + struct Command { + enum class Type { + kDraw, + kSwap, + }; + const uint8_t* head_ptr; + const uint8_t* start_ptr; + const uint8_t* end_ptr; + Type type; + union { + struct { + // + } draw; + struct { + // + } swap; + }; + }; + + const uint8_t* start_ptr; + const uint8_t* end_ptr; + int command_count; + std::vector commands; + }; + + TraceReader() = default; + virtual ~TraceReader() = default; + + const Frame* frame(int n) const { return &frames_[n]; } + int frame_count() const { return int(frames_.size()); } + + bool Open(const std::wstring& path); + + void Close(); + + protected: + void ParseTrace(); + + std::unique_ptr mmap_; + const uint8_t* trace_data_ = nullptr; + size_t trace_size_ = 0; + std::vector frames_; +}; + +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_TRACE_READER_H_ diff --git a/src/xenia/gpu/trace_writer.cc b/src/xenia/gpu/trace_writer.cc new file mode 100644 index 000000000..ada89f379 --- /dev/null +++ b/src/xenia/gpu/trace_writer.cc @@ -0,0 +1,141 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2015 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include "xenia/gpu/trace_writer.h" + +#include "xenia/base/string.h" + +namespace xe { +namespace gpu { + +TraceWriter::TraceWriter(uint8_t* membase) + : membase_(membase), file_(nullptr) {} + +TraceWriter::~TraceWriter() = default; + +bool TraceWriter::Open(const std::wstring& path) { + Close(); + + auto canonical_path = xe::to_absolute_path(path); + auto base_path = xe::find_base_path(canonical_path); + xe::filesystem::CreateFolder(base_path); + + file_ = xe::filesystem::OpenFile(canonical_path, "wb"); + return file_ != nullptr; +} + +void TraceWriter::Flush() { + if (file_) { + fflush(file_); + } +} + +void TraceWriter::Close() { + if (file_) { + fflush(file_); + fclose(file_); + file_ = nullptr; + } +} + +void TraceWriter::WritePrimaryBufferStart(uint32_t base_ptr, uint32_t count) { + if (!file_) { + return; + } + auto cmd = PrimaryBufferStartCommand({ + TraceCommandType::kPrimaryBufferStart, base_ptr, 0, + }); + fwrite(&cmd, 1, sizeof(cmd), file_); +} + +void TraceWriter::WritePrimaryBufferEnd() { + if (!file_) { + return; + } + auto cmd = PrimaryBufferEndCommand({ + TraceCommandType::kPrimaryBufferEnd, + }); + fwrite(&cmd, 1, sizeof(cmd), file_); +} + +void TraceWriter::WriteIndirectBufferStart(uint32_t base_ptr, uint32_t count) { + if (!file_) { + return; + } + auto cmd = IndirectBufferStartCommand({ + TraceCommandType::kIndirectBufferStart, base_ptr, 0, + }); + fwrite(&cmd, 1, sizeof(cmd), file_); +} + +void TraceWriter::WriteIndirectBufferEnd() { + if (!file_) { + return; + } + auto cmd = IndirectBufferEndCommand({ + TraceCommandType::kIndirectBufferEnd, + }); + fwrite(&cmd, 1, sizeof(cmd), file_); +} + +void TraceWriter::WritePacketStart(uint32_t base_ptr, uint32_t count) { + if (!file_) { + return; + } + auto cmd = PacketStartCommand({ + TraceCommandType::kPacketStart, base_ptr, count, + }); + fwrite(&cmd, 1, sizeof(cmd), file_); + fwrite(membase_ + base_ptr, 4, count, file_); +} + +void TraceWriter::WritePacketEnd() { + if (!file_) { + return; + } + auto cmd = PacketEndCommand({ + TraceCommandType::kPacketEnd, + }); + fwrite(&cmd, 1, sizeof(cmd), file_); +} + +void TraceWriter::WriteMemoryRead(uint32_t base_ptr, size_t length) { + if (!file_) { + return; + } + auto cmd = MemoryReadCommand({ + TraceCommandType::kMemoryRead, base_ptr, uint32_t(length), + }); + fwrite(&cmd, 1, sizeof(cmd), file_); + fwrite(membase_ + base_ptr, 1, length, file_); +} + +void TraceWriter::WriteMemoryWrite(uint32_t base_ptr, size_t length) { + if (!file_) { + return; + } + auto cmd = MemoryWriteCommand({ + TraceCommandType::kMemoryWrite, base_ptr, uint32_t(length), + }); + fwrite(&cmd, 1, sizeof(cmd), file_); + fwrite(membase_ + base_ptr, 1, length, file_); +} + +void TraceWriter::WriteEvent(EventType event_type) { + if (!file_) { + return; + } + auto cmd = EventCommand({ + TraceCommandType::kEvent, event_type, + }); + fwrite(&cmd, 1, sizeof(cmd), file_); +} + +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/trace_writer.h b/src/xenia/gpu/trace_writer.h new file mode 100644 index 000000000..915c18e65 --- /dev/null +++ b/src/xenia/gpu/trace_writer.h @@ -0,0 +1,50 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2015 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_TRACE_WRITER_H_ +#define XENIA_GPU_TRACE_WRITER_H_ + +#include + +#include "xenia/base/filesystem.h" +#include "xenia/gpu/trace_protocol.h" + +namespace xe { +namespace gpu { + +class TraceWriter { + public: + explicit TraceWriter(uint8_t* membase); + ~TraceWriter(); + + bool is_open() const { return file_ != nullptr; } + + bool Open(const std::wstring& path); + void Flush(); + void Close(); + + void WritePrimaryBufferStart(uint32_t base_ptr, uint32_t count); + void WritePrimaryBufferEnd(); + void WriteIndirectBufferStart(uint32_t base_ptr, uint32_t count); + void WriteIndirectBufferEnd(); + void WritePacketStart(uint32_t base_ptr, uint32_t count); + void WritePacketEnd(); + void WriteMemoryRead(uint32_t base_ptr, size_t length); + void WriteMemoryWrite(uint32_t base_ptr, size_t length); + void WriteEvent(EventType event_type); + + private: + uint8_t* membase_; + FILE* file_; +}; + +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_TRACE_WRITER_H_ diff --git a/src/xenia/gpu/tracing.cc b/src/xenia/gpu/tracing.cc deleted file mode 100644 index a387337ea..000000000 --- a/src/xenia/gpu/tracing.cc +++ /dev/null @@ -1,49 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2015 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#include "xenia/gpu/tracing.h" - -#include "xenia/base/filesystem.h" -#include "xenia/base/string.h" - -namespace xe { -namespace gpu { - -TraceWriter::TraceWriter(uint8_t* membase) - : membase_(membase), file_(nullptr) {} - -TraceWriter::~TraceWriter() = default; - -bool TraceWriter::Open(const std::wstring& path) { - Close(); - - auto canonical_path = xe::to_absolute_path(path); - auto base_path = xe::find_base_path(canonical_path); - xe::filesystem::CreateFolder(base_path); - - file_ = xe::filesystem::OpenFile(canonical_path, "wb"); - return file_ != nullptr; -} - -void TraceWriter::Flush() { - if (file_) { - fflush(file_); - } -} - -void TraceWriter::Close() { - if (file_) { - fflush(file_); - fclose(file_); - file_ = nullptr; - } -} - -} // namespace gpu -} // namespace xe diff --git a/src/xenia/gpu/tracing.h b/src/xenia/gpu/tracing.h deleted file mode 100644 index 2f63ae096..000000000 --- a/src/xenia/gpu/tracing.h +++ /dev/null @@ -1,195 +0,0 @@ -/** - ****************************************************************************** - * Xenia : Xbox 360 Emulator Research Project * - ****************************************************************************** - * Copyright 2015 Ben Vanik. All rights reserved. * - * Released under the BSD license - see LICENSE in the root for more details. * - ****************************************************************************** - */ - -#ifndef XENIA_GPU_TRACING_H_ -#define XENIA_GPU_TRACING_H_ - -#include - -#include "xenia/memory.h" - -namespace xe { -namespace gpu { - -enum class TraceCommandType : uint32_t { - kPrimaryBufferStart, - kPrimaryBufferEnd, - kIndirectBufferStart, - kIndirectBufferEnd, - kPacketStart, - kPacketEnd, - kMemoryRead, - kMemoryWrite, - kEvent, -}; - -struct PrimaryBufferStartCommand { - TraceCommandType type; - uint32_t base_ptr; - uint32_t count; -}; - -struct PrimaryBufferEndCommand { - TraceCommandType type; -}; - -struct IndirectBufferStartCommand { - TraceCommandType type; - uint32_t base_ptr; - uint32_t count; -}; - -struct IndirectBufferEndCommand { - TraceCommandType type; -}; - -struct PacketStartCommand { - TraceCommandType type; - uint32_t base_ptr; - uint32_t count; -}; - -struct PacketEndCommand { - TraceCommandType type; -}; - -struct MemoryReadCommand { - TraceCommandType type; - uint32_t base_ptr; - uint32_t length; -}; - -struct MemoryWriteCommand { - TraceCommandType type; - uint32_t base_ptr; - uint32_t length; -}; - -enum class EventType { - kSwap, -}; - -struct EventCommand { - TraceCommandType type; - EventType event_type; -}; - -class TraceWriter { - public: - explicit TraceWriter(uint8_t* membase); - ~TraceWriter(); - - bool is_open() const { return file_ != nullptr; } - - bool Open(const std::wstring& path); - void Flush(); - void Close(); - - void WritePrimaryBufferStart(uint32_t base_ptr, uint32_t count) { - if (!file_) { - return; - } - auto cmd = PrimaryBufferStartCommand({ - TraceCommandType::kPrimaryBufferStart, base_ptr, 0, - }); - fwrite(&cmd, 1, sizeof(cmd), file_); - } - - void WritePrimaryBufferEnd() { - if (!file_) { - return; - } - auto cmd = PrimaryBufferEndCommand({ - TraceCommandType::kPrimaryBufferEnd, - }); - fwrite(&cmd, 1, sizeof(cmd), file_); - } - - void WriteIndirectBufferStart(uint32_t base_ptr, uint32_t count) { - if (!file_) { - return; - } - auto cmd = IndirectBufferStartCommand({ - TraceCommandType::kIndirectBufferStart, base_ptr, 0, - }); - fwrite(&cmd, 1, sizeof(cmd), file_); - } - - void WriteIndirectBufferEnd() { - if (!file_) { - return; - } - auto cmd = IndirectBufferEndCommand({ - TraceCommandType::kIndirectBufferEnd, - }); - fwrite(&cmd, 1, sizeof(cmd), file_); - } - - void WritePacketStart(uint32_t base_ptr, uint32_t count) { - if (!file_) { - return; - } - auto cmd = PacketStartCommand({ - TraceCommandType::kPacketStart, base_ptr, count, - }); - fwrite(&cmd, 1, sizeof(cmd), file_); - fwrite(membase_ + base_ptr, 4, count, file_); - } - - void WritePacketEnd() { - if (!file_) { - return; - } - auto cmd = PacketEndCommand({ - TraceCommandType::kPacketEnd, - }); - fwrite(&cmd, 1, sizeof(cmd), file_); - } - - void WriteMemoryRead(uint32_t base_ptr, size_t length) { - if (!file_) { - return; - } - auto cmd = MemoryReadCommand({ - TraceCommandType::kMemoryRead, base_ptr, uint32_t(length), - }); - fwrite(&cmd, 1, sizeof(cmd), file_); - fwrite(membase_ + base_ptr, 1, length, file_); - } - - void WriteMemoryWrite(uint32_t base_ptr, size_t length) { - if (!file_) { - return; - } - auto cmd = MemoryWriteCommand({ - TraceCommandType::kMemoryWrite, base_ptr, uint32_t(length), - }); - fwrite(&cmd, 1, sizeof(cmd), file_); - fwrite(membase_ + base_ptr, 1, length, file_); - } - - void WriteEvent(EventType event_type) { - if (!file_) { - return; - } - auto cmd = EventCommand({ - TraceCommandType::kEvent, event_type, - }); - fwrite(&cmd, 1, sizeof(cmd), file_); - } - - private: - uint8_t* membase_; - FILE* file_; -}; - -} // namespace gpu -} // namespace xe - -#endif // XENIA_GPU_TRACING_H_ diff --git a/src/xenia/ui/gl/gl_immediate_drawer.cc b/src/xenia/ui/gl/gl_immediate_drawer.cc index d0ba4ae10..9cbdaf037 100644 --- a/src/xenia/ui/gl/gl_immediate_drawer.cc +++ b/src/xenia/ui/gl/gl_immediate_drawer.cc @@ -161,8 +161,6 @@ std::unique_ptr GLImmediateDrawer::CreateTexture( GraphicsContextLock lock(graphics_context_); auto texture = std::make_unique(width, height, filter, repeat); - glTextureStorage2D(static_cast(texture->handle), 1, GL_RGBA8, width, - height); if (data) { UpdateTexture(texture.get(), data); } @@ -188,8 +186,8 @@ void GLImmediateDrawer::Begin(int render_target_width, glEnablei(GL_BLEND, 0); glBlendEquationi(0, GL_FUNC_ADD); glBlendFunci(0, GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); - glDisablei(GL_DEPTH_TEST, 0); - glDisablei(GL_SCISSOR_TEST, 0); + glDisable(GL_DEPTH_TEST); + glDisable(GL_SCISSOR_TEST); // Prepare drawing resources. glUseProgram(program_); @@ -223,11 +221,11 @@ void GLImmediateDrawer::BeginDrawBatch(const ImmediateDrawBatch& batch) { void GLImmediateDrawer::Draw(const ImmediateDraw& draw) { if (draw.scissor) { - glEnablei(GL_SCISSOR_TEST, 0); + glEnable(GL_SCISSOR_TEST); glScissorIndexed(0, draw.scissor_rect[0], draw.scissor_rect[1], draw.scissor_rect[2], draw.scissor_rect[3]); } else { - glDisablei(GL_SCISSOR_TEST, 0); + glDisable(GL_SCISSOR_TEST); } if (draw.texture_handle) { @@ -261,7 +259,7 @@ void GLImmediateDrawer::EndDrawBatch() { glFlush(); } void GLImmediateDrawer::End() { // Restore modified state. - glDisablei(GL_SCISSOR_TEST, 0); + glDisable(GL_SCISSOR_TEST); glBindTextureUnit(0, 0); glUseProgram(0); glBindVertexArray(0);