diff --git a/src/poly/ui/control.h b/src/poly/ui/control.h index 197fb2b51..d96cf4629 100644 --- a/src/poly/ui/control.h +++ b/src/poly/ui/control.h @@ -45,6 +45,7 @@ class Control { virtual void ResizeToFill(int32_t pad_left, int32_t pad_top, int32_t pad_right, int32_t pad_bottom) = 0; void Layout(); + virtual void Invalidate() {} // TODO(benvanik): colors/brushes/etc. // TODO(benvanik): fonts. diff --git a/src/poly/ui/win32/win32_control.cc b/src/poly/ui/win32/win32_control.cc index 72fa6de76..c4f888cba 100644 --- a/src/poly/ui/win32/win32_control.cc +++ b/src/poly/ui/win32/win32_control.cc @@ -114,10 +114,19 @@ void Win32Control::OnResize(UIEvent& e) { for (auto& child_control : children_) { auto win32_control = static_cast(child_control.get()); win32_control->OnResize(e); + win32_control->Invalidate(); } } } +void Win32Control::Invalidate() { + InvalidateRect(hwnd_, nullptr, FALSE); + for (auto& child_control : children_) { + auto win32_control = static_cast(child_control.get()); + win32_control->Invalidate(); + } +} + void Win32Control::set_cursor_visible(bool value) { if (is_cursor_visible_ == value) { return; diff --git a/src/poly/ui/win32/win32_control.h b/src/poly/ui/win32/win32_control.h index 52a43cf78..baef5ae4f 100644 --- a/src/poly/ui/win32/win32_control.h +++ b/src/poly/ui/win32/win32_control.h @@ -33,6 +33,7 @@ class Win32Control : public Control { int32_t bottom) override; void ResizeToFill(int32_t pad_left, int32_t pad_top, int32_t pad_right, int32_t pad_bottom) override; + void Invalidate() override; void set_cursor_visible(bool value) override; void set_enabled(bool value) override; diff --git a/src/xenia/gpu/gl4/command_processor.cc b/src/xenia/gpu/gl4/command_processor.cc new file mode 100644 index 000000000..6879d2c22 --- /dev/null +++ b/src/xenia/gpu/gl4/command_processor.cc @@ -0,0 +1,940 @@ +/** +****************************************************************************** +* Xenia : Xbox 360 Emulator Research Project * +****************************************************************************** +* Copyright 2014 Ben Vanik. All rights reserved. * +* Released under the BSD license - see LICENSE in the root for more details. * +****************************************************************************** +*/ + +#include + +#include + +#include +#include +#include +#include + +#define XETRACECP(fmt, ...) \ + if (FLAGS_trace_ring_buffer) XELOGGPU(fmt, ##__VA_ARGS__) + +namespace xe { +namespace gpu { +namespace gl4 { + +using namespace xe::gpu::xenos; + +CommandProcessor::CommandProcessor(GL4GraphicsSystem* graphics_system) + : memory_(graphics_system->memory()), + membase_(graphics_system->memory()->membase()), + graphics_system_(graphics_system), + register_file_(graphics_system_->register_file()), + worker_running_(true), + time_base_(0), + counter_(0), + primary_buffer_ptr_(0), + primary_buffer_size_(0), + read_ptr_index_(0), + read_ptr_update_freq_(0), + read_ptr_writeback_ptr_(0), + write_ptr_index_event_(CreateEvent(NULL, FALSE, FALSE, NULL)), + write_ptr_index_(0) { + LARGE_INTEGER perf_counter; + QueryPerformanceCounter(&perf_counter); + time_base_ = perf_counter.QuadPart; +} + +CommandProcessor::~CommandProcessor() { CloseHandle(write_ptr_index_event_); } + +uint64_t CommandProcessor::QueryTime() { + LARGE_INTEGER perf_counter; + QueryPerformanceCounter(&perf_counter); + return perf_counter.QuadPart - time_base_; +} + +void CommandProcessor::Initialize(uint32_t ptr, uint32_t page_count) { + primary_buffer_ptr_ = ptr; + // Not sure this is correct, but it's a way to take the page_count back to + // the number of bytes allocated by the physical alloc. + uint32_t original_size = 1 << (0x1C - page_count - 1); + primary_buffer_size_ = original_size; + read_ptr_index_ = 0; + + worker_running_ = true; + worker_thread_ = std::thread([this]() { + poly::threading::set_name("GL4 Worker"); + xe::Profiler::ThreadEnter("GL4 Worker"); + WorkerMain(); + xe::Profiler::ThreadExit(); + }); +} + +void CommandProcessor::Shutdown() { + worker_running_ = false; + SetEvent(write_ptr_index_event_); + worker_thread_.join(); +} + +void CommandProcessor::WorkerMain() { + while (worker_running_) { + uint32_t write_ptr_index = write_ptr_index_.load(); + while (write_ptr_index == 0xBAADF00D || + read_ptr_index_ == write_ptr_index) { + // Check if the pointer has moved. + // We wait a short bit here to yield time. Since we are also running the + // main window display we don't want to pause too long, though. + // YieldProcessor(); + const int wait_time_ms = 5; + if (WaitForSingleObject(write_ptr_index_event_, wait_time_ms) == + WAIT_TIMEOUT) { + write_ptr_index = write_ptr_index_.load(); + continue; + } + } + assert_true(read_ptr_index_ != write_ptr_index); + + // Process the new commands. + XETRACECP("Command processor thread work"); + + // Execute. Note that we handle wraparound transparently. + ExecutePrimaryBuffer(read_ptr_index_, write_ptr_index); + read_ptr_index_ = write_ptr_index; + + // TODO(benvanik): use reader->Read_update_freq_ and only issue after moving + // that many indices. + if (read_ptr_writeback_ptr_) { + poly::store_and_swap(membase_ + read_ptr_writeback_ptr_, + read_ptr_index_); + } + } +} + +void CommandProcessor::EnableReadPointerWriteBack(uint32_t ptr, + uint32_t block_size) { + // CP_RB_RPTR_ADDR Ring Buffer Read Pointer Address 0x70C + // ptr = RB_RPTR_ADDR, pointer to write back the address to. + read_ptr_writeback_ptr_ = (primary_buffer_ptr_ & ~0x1FFFFFFF) + ptr; + // CP_RB_CNTL Ring Buffer Control 0x704 + // block_size = RB_BLKSZ, number of quadwords read between updates of the + // read pointer. + read_ptr_update_freq_ = (uint32_t)pow(2.0, (double)block_size) / 4; +} + +void CommandProcessor::UpdateWritePointer(uint32_t value) { + write_ptr_index_ = value; + SetEvent(write_ptr_index_event_); +} + +void CommandProcessor::WriteRegister(uint32_t packet_ptr, uint32_t index, + uint32_t value) { + RegisterFile* regs = register_file_; + assert_true(index < RegisterFile::kRegisterCount); + regs->values[index].u32 = value; + + // If this is a COHER register, set the dirty flag. + // This will block the command processor the next time it WAIT_MEM_REGs and + // allow us to synchronize the memory. + if (index == XE_GPU_REG_COHER_STATUS_HOST) { + regs->values[index].u32 |= 0x80000000ul; + } + + // Scratch register writeback. + if (index >= XE_GPU_REG_SCRATCH_REG0 && index <= XE_GPU_REG_SCRATCH_REG7) { + uint32_t scratch_reg = index - XE_GPU_REG_SCRATCH_REG0; + if ((1 << scratch_reg) & regs->values[XE_GPU_REG_SCRATCH_UMSK].u32) { + // Enabled - write to address. + uint32_t scratch_addr = regs->values[XE_GPU_REG_SCRATCH_ADDR].u32; + uint32_t mem_addr = scratch_addr + (scratch_reg * 4); + poly::store_and_swap( + membase_ + xenos::GpuToCpu(primary_buffer_ptr_, mem_addr), value); + } + } +} + +void CommandProcessor::MakeCoherent() { + // Status host often has 0x01000000 or 0x03000000. + // This is likely toggling VC (vertex cache) or TC (texture cache). + // Or, it also has a direction in here maybe - there is probably + // some way to check for dest coherency (what all the COHER_DEST_BASE_* + // registers are for). + // Best docs I've found on this are here: + // http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2013/10/R6xx_R7xx_3D.pdf + // http://cgit.freedesktop.org/xorg/driver/xf86-video-radeonhd/tree/src/r6xx_accel.c?id=3f8b6eccd9dba116cc4801e7f80ce21a879c67d2#n454 + + RegisterFile* regs = register_file_; + auto status_host = regs->values[XE_GPU_REG_COHER_STATUS_HOST].u32; + auto base_host = regs->values[XE_GPU_REG_COHER_BASE_HOST].u32; + auto size_host = regs->values[XE_GPU_REG_COHER_SIZE_HOST].u32; + + if (!(status_host & 0x80000000ul)) { + return; + } + + // TODO(benvanik): notify resource cache of base->size and type. + XETRACECP("Make %.8X -> %.8X (%db) coherent", base_host, + base_host + size_host, size_host); + + // Mark coherent. + status_host &= ~0x80000000ul; + regs->values[XE_GPU_REG_COHER_STATUS_HOST].u32 = status_host; +} + +class CommandProcessor::RingbufferReader { + public: + RingbufferReader(uint8_t* membase, uint32_t base_ptr, uint32_t ptr_mask, + uint32_t start_ptr, uint32_t end_ptr) + : membase_(membase), + base_ptr_(base_ptr), + ptr_mask_(ptr_mask), + start_ptr_(start_ptr), + end_ptr_(end_ptr), + ptr_(start_ptr) {} + + uint32_t ptr() const { return ptr_; } + uint32_t offset() const { return (ptr_ - start_ptr_) / sizeof(uint32_t); } + bool can_read() const { return ptr_ != end_ptr_; } + + uint32_t Peek() { return poly::load_and_swap(membase_ + ptr_); } + + void CheckRead(uint32_t words) { + assert_true(ptr_ + words * sizeof(uint32_t) <= end_ptr_); + } + + uint32_t Read() { + uint32_t value = poly::load_and_swap(membase_ + ptr_); + Advance(1); + return value; + } + + void Advance(uint32_t words) { + ptr_ = ptr_ + words * sizeof(uint32_t); + if (ptr_mask_) { + ptr_ = base_ptr_ + + (((ptr_ - base_ptr_) / sizeof(uint32_t)) & ptr_mask_) * + sizeof(uint32_t); + } + assert_true(ptr_ <= end_ptr_); + } + + void Skip(uint32_t words) { Advance(words); } + + void TraceData(uint32_t words) { + for (uint32_t i = 0; i < words; ++i) { + uint32_t i_ptr = ptr_ + i * sizeof(uint32_t); + XETRACECP("[%.8X] %.8X", i_ptr, + poly::load_and_swap(membase_ + i_ptr)); + } + } + + private: + uint8_t* membase_; + + uint32_t base_ptr_; + uint32_t ptr_mask_; + uint32_t start_ptr_; + uint32_t end_ptr_; + uint32_t ptr_; +}; + +void CommandProcessor::ExecutePrimaryBuffer(uint32_t start_index, + uint32_t end_index) { + SCOPE_profile_cpu_f("gpu"); + + // Adjust pointer base. + uint32_t start_ptr = primary_buffer_ptr_ + start_index * sizeof(uint32_t); + start_ptr = (primary_buffer_ptr_ & ~0x1FFFFFFF) | (start_ptr & 0x1FFFFFFF); + uint32_t end_ptr = primary_buffer_ptr_ + end_index * sizeof(uint32_t); + end_ptr = (primary_buffer_ptr_ & ~0x1FFFFFFF) | (end_ptr & 0x1FFFFFFF); + + XETRACECP("[%.8X] ExecutePrimaryBuffer(%dw -> %dw)", start_ptr, start_index, + end_index); + + // Execute commands! + uint32_t ptr_mask = (primary_buffer_size_ / sizeof(uint32_t)) - 1; + RingbufferReader reader(membase_, primary_buffer_ptr_, ptr_mask, start_ptr, + end_ptr); + while (reader.can_read()) { + ExecutePacket(&reader); + } + if (end_index > start_index) { + assert_true(reader.offset() == (end_index - start_index)); + } + + XETRACECP(" ExecutePrimaryBuffer End"); +} + +void CommandProcessor::ExecuteIndirectBuffer(uint32_t ptr, uint32_t length) { + XETRACECP("[%.8X] ExecuteIndirectBuffer(%dw)", ptr, length); + + // Execute commands! + uint32_t ptr_mask = 0; + RingbufferReader reader(membase_, primary_buffer_ptr_, ptr_mask, ptr, + ptr + length * sizeof(uint32_t)); + while (reader.can_read()) { + ExecutePacket(&reader); + } + + XETRACECP(" ExecuteIndirectBuffer End"); +} + +bool CommandProcessor::ExecutePacket(RingbufferReader* reader) { + RegisterFile* regs = register_file_; + + uint32_t packet_ptr = reader->ptr(); + const uint32_t packet = reader->Read(); + const uint32_t packet_type = packet >> 30; + if (packet == 0) { + XETRACECP("[%.8X] Packet(%.8X): 0?", packet_ptr, packet); + return true; + } + + switch (packet_type) { + case 0x00: + return ExecutePacketType0(reader, packet_ptr, packet); + case 0x01: + return ExecutePacketType1(reader, packet_ptr, packet); + case 0x02: + return ExecutePacketType2(reader, packet_ptr, packet); + case 0x03: + return ExecutePacketType3(reader, packet_ptr, packet); + } +} + +bool CommandProcessor::ExecutePacketType0(RingbufferReader* reader, + uint32_t packet_ptr, + uint32_t packet) { + // Type-0 packet. + // Write count registers in sequence to the registers starting at + // (base_index << 2). + XETRACECP("[%.8X] Packet(%.8X): set registers:", packet_ptr, packet); + uint32_t count = ((packet >> 16) & 0x3FFF) + 1; + uint32_t base_index = (packet & 0x7FFF); + uint32_t write_one_reg = (packet >> 15) & 0x1; + for (uint32_t m = 0; m < count; m++) { + uint32_t reg_data = reader->Peek(); + uint32_t target_index = write_one_reg ? base_index : base_index + m; + const char* reg_name = register_file_->GetRegisterName(target_index); + XETRACECP("[%.8X] %.8X -> %.4X %s", reader->ptr(), reg_data, target_index, + reg_name ? reg_name : ""); + reader->Advance(1); + WriteRegister(packet_ptr, target_index, reg_data); + } + return true; +} + +bool CommandProcessor::ExecutePacketType1(RingbufferReader* reader, + uint32_t packet_ptr, + uint32_t packet) { + // Type-1 packet. + // Contains two registers of data. Type-0 should be more common. + XETRACECP("[%.8X] Packet(%.8X): set registers:", packet_ptr, packet); + uint32_t reg_index_1 = packet & 0x7FF; + uint32_t reg_index_2 = (packet >> 11) & 0x7FF; + uint32_t reg_ptr_1 = reader->ptr(); + uint32_t reg_data_1 = reader->Read(); + uint32_t reg_ptr_2 = reader->ptr(); + uint32_t reg_data_2 = reader->Read(); + const char* reg_name_1 = register_file_->GetRegisterName(reg_index_1); + const char* reg_name_2 = register_file_->GetRegisterName(reg_index_2); + XETRACECP("[%.8X] %.8X -> %.4X %s", reg_ptr_1, reg_data_1, reg_index_1, + reg_name_1 ? reg_name_1 : ""); + XETRACECP("[%.8X] %.8X -> %.4X %s", reg_ptr_2, reg_data_2, reg_index_2, + reg_name_2 ? reg_name_2 : ""); + WriteRegister(packet_ptr, reg_index_1, reg_data_1); + WriteRegister(packet_ptr, reg_index_2, reg_data_2); + return true; +} + +bool CommandProcessor::ExecutePacketType2(RingbufferReader* reader, + uint32_t packet_ptr, + uint32_t packet) { + // Type-2 packet. + // No-op. Do nothing. + XETRACECP("[%.8X] Packet(%.8X): padding", packet_ptr, packet); + return true; +} + +bool CommandProcessor::ExecutePacketType3(RingbufferReader* reader, + uint32_t packet_ptr, + uint32_t packet) { + // Type-3 packet. + // & 1 == predicate, maybe? + uint32_t opcode = (packet >> 8) & 0x7F; + uint32_t count = ((packet >> 16) & 0x3FFF) + 1; + auto data_start_offset = reader->offset(); + + bool result = false; + switch (opcode) { + case PM4_ME_INIT: + result = ExecutePacketType3_ME_INIT(reader, packet_ptr, packet, count); + break; + case PM4_NOP: + result = ExecutePacketType3_NOP(reader, packet_ptr, packet, count); + break; + case PM4_INTERRUPT: + result = ExecutePacketType3_INTERRUPT(reader, packet_ptr, packet, count); + break; + case PM4_XE_SWAP: + result = ExecutePacketType3_XE_SWAP(reader, packet_ptr, packet, count); + break; + case PM4_INDIRECT_BUFFER: + result = + ExecutePacketType3_INDIRECT_BUFFER(reader, packet_ptr, packet, count); + break; + case PM4_WAIT_REG_MEM: + result = + ExecutePacketType3_WAIT_REG_MEM(reader, packet_ptr, packet, count); + break; + case PM4_REG_RMW: + result = ExecutePacketType3_REG_RMW(reader, packet_ptr, packet, count); + break; + case PM4_COND_WRITE: + result = ExecutePacketType3_COND_WRITE(reader, packet_ptr, packet, count); + break; + case PM4_EVENT_WRITE: + result = + ExecutePacketType3_EVENT_WRITE(reader, packet_ptr, packet, count); + break; + case PM4_EVENT_WRITE_SHD: + result = + ExecutePacketType3_EVENT_WRITE_SHD(reader, packet_ptr, packet, count); + break; + case PM4_DRAW_INDX: + result = ExecutePacketType3_DRAW_INDX(reader, packet_ptr, packet, count); + break; + case PM4_DRAW_INDX_2: + result = + ExecutePacketType3_DRAW_INDX_2(reader, packet_ptr, packet, count); + break; + case PM4_SET_CONSTANT: + result = + ExecutePacketType3_SET_CONSTANT(reader, packet_ptr, packet, count); + break; + case PM4_LOAD_ALU_CONSTANT: + result = ExecutePacketType3_LOAD_ALU_CONSTANT(reader, packet_ptr, packet, + count); + break; + case PM4_IM_LOAD: + result = ExecutePacketType3_IM_LOAD(reader, packet_ptr, packet, count); + break; + case PM4_IM_LOAD_IMMEDIATE: + result = ExecutePacketType3_IM_LOAD_IMMEDIATE(reader, packet_ptr, packet, + count); + break; + case PM4_INVALIDATE_STATE: + result = ExecutePacketType3_INVALIDATE_STATE(reader, packet_ptr, packet, + count); + break; + + case PM4_SET_BIN_MASK_LO: { + uint32_t value = reader->Read(); + XETRACECP("[%.8X] Packet(%.8X): PM4_SET_BIN_MASK_LO = %.8X", packet_ptr, + packet, value); + result = true; + } break; + case PM4_SET_BIN_MASK_HI: { + uint32_t value = reader->Read(); + XETRACECP("[%.8X] Packet(%.8X): PM4_SET_BIN_MASK_HI = %.8X", packet_ptr, + packet, value); + result = true; + } break; + case PM4_SET_BIN_SELECT_LO: { + uint32_t value = reader->Read(); + XETRACECP("[%.8X] Packet(%.8X): PM4_SET_BIN_SELECT_LO = %.8X", packet_ptr, + packet, value); + result = true; + } break; + case PM4_SET_BIN_SELECT_HI: { + uint32_t value = reader->Read(); + XETRACECP("[%.8X] Packet(%.8X): PM4_SET_BIN_SELECT_HI = %.8X", packet_ptr, + packet, value); + result = true; + } break; + + // Ignored packets - useful if breaking on the default handler below. + case 0x50: // 0xC0015000 usually 2 words, 0xFFFFFFFF / 0x00000000 + XETRACECP("[%.8X] Packet(%.8X): unknown!", packet_ptr, packet); + reader->TraceData(count); + reader->Skip(count); + break; + + default: + XETRACECP("[%.8X] Packet(%.8X): unknown!", packet_ptr, packet); + reader->TraceData(count); + reader->Skip(count); + break; + } + + assert_true(reader->offset() == data_start_offset + count); + return result; +} + +bool CommandProcessor::ExecutePacketType3_ME_INIT(RingbufferReader* reader, + uint32_t packet_ptr, + uint32_t packet, + uint32_t count) { + // initialize CP's micro-engine + XETRACECP("[%.8X] Packet(%.8X): PM4_ME_INIT", packet_ptr, packet); + reader->TraceData(count); + reader->Advance(count); + return true; +} + +bool CommandProcessor::ExecutePacketType3_NOP(RingbufferReader* reader, + uint32_t packet_ptr, + uint32_t packet, uint32_t count) { + // skip N 32-bit words to get to the next packet + // No-op, ignore some data. + XETRACECP("[%.8X] Packet(%.8X): PM4_NOP", packet_ptr, packet); + reader->TraceData(count); + reader->Advance(count); + return true; +} + +bool CommandProcessor::ExecutePacketType3_INTERRUPT(RingbufferReader* reader, + uint32_t packet_ptr, + uint32_t packet, + uint32_t count) { + // generate interrupt from the command stream + XETRACECP("[%.8X] Packet(%.8X): PM4_INTERRUPT", packet_ptr, packet); + reader->TraceData(count); + uint32_t cpu_mask = reader->Read(); + for (int n = 0; n < 6; n++) { + if (cpu_mask & (1 << n)) { + graphics_system_->DispatchInterruptCallback(1, n); + } + } + return true; +} + +bool CommandProcessor::ExecutePacketType3_XE_SWAP(RingbufferReader* reader, + uint32_t packet_ptr, + uint32_t packet, + uint32_t count) { + // Xenia-specific VdSwap hook. + // VdSwap will post this to tell us we need to swap the screen/fire an + // interrupt. + XETRACECP("[%.8X] Packet(%.8X): PM4_XE_SWAP", packet_ptr, packet); + reader->TraceData(count); + reader->Advance(count); + if (swap_handler_) { + swap_handler_(); + } + return true; +} + +bool CommandProcessor::ExecutePacketType3_INDIRECT_BUFFER( + RingbufferReader* reader, uint32_t packet_ptr, uint32_t packet, + uint32_t count) { + // indirect buffer dispatch + uint32_t list_ptr = reader->Read(); + uint32_t list_length = reader->Read(); + XETRACECP("[%.8X] Packet(%.8X): PM4_INDIRECT_BUFFER %.8X (%dw)", packet_ptr, + packet, list_ptr, list_length); + ExecuteIndirectBuffer(GpuToCpu(list_ptr), list_length); + return true; +} + +bool CommandProcessor::ExecutePacketType3_WAIT_REG_MEM(RingbufferReader* reader, + uint32_t packet_ptr, + uint32_t packet, + uint32_t count) { + // wait until a register or memory location is a specific value + XETRACECP("[%.8X] Packet(%.8X): PM4_WAIT_REG_MEM", packet_ptr, packet); + reader->TraceData(count); + uint32_t wait_info = reader->Read(); + uint32_t poll_reg_addr = reader->Read(); + uint32_t ref = reader->Read(); + uint32_t mask = reader->Read(); + uint32_t wait = reader->Read(); + bool matched = false; + do { + uint32_t value; + if (wait_info & 0x10) { + // Memory. + auto endianness = static_cast(poll_reg_addr & 0x3); + poll_reg_addr &= ~0x3; + value = + poly::load(membase_ + GpuToCpu(packet_ptr, poll_reg_addr)); + value = GpuSwap(value, endianness); + } else { + // Register. + assert_true(poll_reg_addr < RegisterFile::kRegisterCount); + value = register_file_->values[poll_reg_addr].u32; + if (poll_reg_addr == XE_GPU_REG_COHER_STATUS_HOST) { + MakeCoherent(); + value = register_file_->values[poll_reg_addr].u32; + } + } + switch (wait_info & 0x7) { + case 0x0: // Never. + matched = false; + break; + case 0x1: // Less than reference. + matched = (value & mask) < ref; + break; + case 0x2: // Less than or equal to reference. + matched = (value & mask) <= ref; + break; + case 0x3: // Equal to reference. + matched = (value & mask) == ref; + break; + case 0x4: // Not equal to reference. + matched = (value & mask) != ref; + break; + case 0x5: // Greater than or equal to reference. + matched = (value & mask) >= ref; + break; + case 0x6: // Greater than reference. + matched = (value & mask) > ref; + break; + case 0x7: // Always + matched = true; + break; + } + if (!matched) { + // Wait. + if (wait >= 0x100) { + Sleep(wait / 0x100); + } else { + SwitchToThread(); + } + } + } while (!matched); + return true; +} + +bool CommandProcessor::ExecutePacketType3_REG_RMW(RingbufferReader* reader, + uint32_t packet_ptr, + uint32_t packet, + uint32_t count) { + // register read/modify/write + // ? (used during shader upload and edram setup) + XETRACECP("[%.8X] Packet(%.8X): PM4_REG_RMW", packet_ptr, packet); + reader->TraceData(count); + uint32_t rmw_info = reader->Read(); + uint32_t and_mask = reader->Read(); + uint32_t or_mask = reader->Read(); + uint32_t value = register_file_->values[rmw_info & 0x1FFF].u32; + if ((rmw_info >> 30) & 0x1) { + // | reg + value |= register_file_->values[or_mask & 0x1FFF].u32; + } else { + // | imm + value |= or_mask; + } + if ((rmw_info >> 31) & 0x1) { + // & reg + value &= register_file_->values[and_mask & 0x1FFF].u32; + } else { + // & imm + value &= and_mask; + } + WriteRegister(packet_ptr, rmw_info & 0x1FFF, value); + return true; +} + +bool CommandProcessor::ExecutePacketType3_COND_WRITE(RingbufferReader* reader, + uint32_t packet_ptr, + uint32_t packet, + uint32_t count) { + // conditional write to memory or register + XETRACECP("[%.8X] Packet(%.8X): PM4_COND_WRITE", packet_ptr, packet); + reader->TraceData(count); + uint32_t wait_info = reader->Read(); + uint32_t poll_reg_addr = reader->Read(); + uint32_t ref = reader->Read(); + uint32_t mask = reader->Read(); + uint32_t write_reg_addr = reader->Read(); + uint32_t write_data = reader->Read(); + uint32_t value; + if (wait_info & 0x10) { + // Memory. + auto endianness = static_cast(poll_reg_addr & 0x3); + poll_reg_addr &= ~0x3; + value = + poly::load(membase_ + GpuToCpu(packet_ptr, poll_reg_addr)); + value = GpuSwap(value, endianness); + } else { + // Register. + assert_true(poll_reg_addr < RegisterFile::kRegisterCount); + value = register_file_->values[poll_reg_addr].u32; + } + bool matched = false; + switch (wait_info & 0x7) { + case 0x0: // Never. + matched = false; + break; + case 0x1: // Less than reference. + matched = (value & mask) < ref; + break; + case 0x2: // Less than or equal to reference. + matched = (value & mask) <= ref; + break; + case 0x3: // Equal to reference. + matched = (value & mask) == ref; + break; + case 0x4: // Not equal to reference. + matched = (value & mask) != ref; + break; + case 0x5: // Greater than or equal to reference. + matched = (value & mask) >= ref; + break; + case 0x6: // Greater than reference. + matched = (value & mask) > ref; + break; + case 0x7: // Always + matched = true; + break; + } + if (matched) { + // Write. + if (wait_info & 0x100) { + // Memory. + auto endianness = static_cast(write_reg_addr & 0x3); + write_reg_addr &= ~0x3; + write_data = GpuSwap(write_data, endianness); + poly::store(membase_ + GpuToCpu(packet_ptr, write_reg_addr), write_data); + } else { + // Register. + WriteRegister(packet_ptr, write_reg_addr, write_data); + } + } + return true; +} + +bool CommandProcessor::ExecutePacketType3_EVENT_WRITE(RingbufferReader* reader, + uint32_t packet_ptr, + uint32_t packet, + uint32_t count) { + // generate an event that creates a write to memory when completed + XETRACECP("[%.8X] Packet(%.8X): PM4_EVENT_WRITE (unimplemented!)", packet_ptr, + packet); + reader->TraceData(count); + uint32_t initiator = reader->Read(); + if (count == 1) { + // Just an event flag? Where does this write? + } else { + // Write to an address. + assert_always(); + reader->Advance(count - 1); + } + return true; +} + +bool CommandProcessor::ExecutePacketType3_EVENT_WRITE_SHD( + RingbufferReader* reader, uint32_t packet_ptr, uint32_t packet, + uint32_t count) { + // generate a VS|PS_done event + XETRACECP("[%.8X] Packet(%.8X): PM4_EVENT_WRITE_SHD", packet_ptr, packet); + reader->TraceData(count); + uint32_t initiator = reader->Read(); + uint32_t address = reader->Read(); + uint32_t value = reader->Read(); + // Writeback initiator. + WriteRegister(packet_ptr, XE_GPU_REG_VGT_EVENT_INITIATOR, initiator & 0x3F); + uint32_t data_value; + if ((initiator >> 31) & 0x1) { + // Write counter (GPU vblank counter?). + data_value = counter_; + } else { + // Write value. + data_value = value; + } + auto endianness = static_cast(address & 0x3); + address &= ~0x3; + data_value = GpuSwap(data_value, endianness); + poly::store(membase_ + GpuToCpu(address), data_value); + return true; +} + +bool CommandProcessor::ExecutePacketType3_DRAW_INDX(RingbufferReader* reader, + uint32_t packet_ptr, + uint32_t packet, + uint32_t count) { + // initiate fetch of index buffer and draw + XETRACECP("[%.8X] Packet(%.8X): PM4_DRAW_INDX", packet_ptr, packet); + reader->TraceData(count); + // dword0 = viz query info + uint32_t dword0 = reader->Read(); + uint32_t dword1 = reader->Read(); + uint32_t index_count = dword1 >> 16; + auto prim_type = static_cast(dword1 & 0x3F); + uint32_t src_sel = (dword1 >> 6) & 0x3; + if (src_sel == 0x0) { + // Indexed draw. + uint32_t index_base = reader->Read(); + uint32_t index_size = reader->Read(); + auto endianness = static_cast(index_size >> 30); + index_size &= 0x00FFFFFF; + bool index_32bit = (dword1 >> 11) & 0x1; + index_size *= index_32bit ? 4 : 2; + } else if (src_sel == 0x2) { + // Auto draw. + } else { + // Unknown source select. + assert_always(); + } + // if (!driver_->PrepareDraw(draw_command_)) { + // draw_command_.prim_type = prim_type; + // draw_command_.start_index = 0; + // draw_command_.index_count = index_count; + // draw_command_.base_vertex = 0; + // if (src_sel == 0x0) { + // // Indexed draw. + // // TODO(benvanik): detect subregions of larger index + // buffers! + // driver_->PrepareDrawIndexBuffer( + // draw_command_, index_base, index_size, + // endianness, + // index_32bit ? INDEX_FORMAT_32BIT : INDEX_FORMAT_16BIT); + // } else if (src_sel == 0x2) { + // // Auto draw. + // draw_command_.index_buffer = nullptr; + // } else { + // // Unknown source select. + // assert_always(); + // } + // driver_->Draw(draw_command_); + // } else { + // if (src_sel == 0x0) { + // reader->Advance(2); // skip + // } + // } + return true; +} + +bool CommandProcessor::ExecutePacketType3_DRAW_INDX_2(RingbufferReader* reader, + uint32_t packet_ptr, + uint32_t packet, + uint32_t count) { + // draw using supplied indices in packet + XETRACECP("[%.8X] Packet(%.8X): PM4_DRAW_INDX_2", packet_ptr, packet); + reader->TraceData(count); + uint32_t dword0 = reader->Read(); + uint32_t index_count = dword0 >> 16; + auto prim_type = static_cast(dword0 & 0x3F); + uint32_t src_sel = (dword0 >> 6) & 0x3; + assert_true(src_sel == 0x2); // 'SrcSel=AutoIndex' + bool index_32bit = (dword0 >> 11) & 0x1; + uint32_t indices_size = index_count * (index_32bit ? 4 : 2); + reader->CheckRead(indices_size / sizeof(uint32_t)); + /*if (!driver_->PrepareDraw(draw_command_)) { + draw_command_.prim_type = prim_type; + draw_command_.start_index = 0; + draw_command_.index_count = index_count; + draw_command_.base_vertex = 0; + draw_command_.index_buffer = nullptr; + driver_->Draw(draw_command_); + }*/ + reader->Advance(count - 1); + return true; +} + +bool CommandProcessor::ExecutePacketType3_SET_CONSTANT(RingbufferReader* reader, + uint32_t packet_ptr, + uint32_t packet, + uint32_t count) { + // load constant into chip and to memory + XETRACECP("[%.8X] Packet(%.8X): PM4_SET_CONSTANT", packet_ptr, packet); + // PM4_REG(reg) ((0x4 << 16) | (GSL_HAL_SUBBLOCK_OFFSET(reg))) + // reg - 0x2000 + uint32_t offset_type = reader->Read(); + uint32_t index = offset_type & 0x7FF; + uint32_t type = (offset_type >> 16) & 0xFF; + switch (type) { + case 0x4: // REGISTER + index += 0x2000; // registers + for (uint32_t n = 0; n < count - 1; n++, index++) { + uint32_t data = reader->Read(); + const char* reg_name = register_file_->GetRegisterName(index); + XETRACECP("[%.8X] %.8X -> %.4X %s", packet_ptr + (1 + n) * 4, data, + index, reg_name ? reg_name : ""); + WriteRegister(packet_ptr, index, data); + } + break; + default: + assert_always(); + break; + } + return true; +} + +bool CommandProcessor::ExecutePacketType3_LOAD_ALU_CONSTANT( + RingbufferReader* reader, uint32_t packet_ptr, uint32_t packet, + uint32_t count) { + // load constants from memory + XETRACECP("[%.8X] Packet(%.8X): PM4_LOAD_ALU_CONSTANT", packet_ptr, packet); + uint32_t address = reader->Read(); + address &= 0x3FFFFFFF; + uint32_t offset_type = reader->Read(); + uint32_t index = offset_type & 0x7FF; + uint32_t size = reader->Read(); + size &= 0xFFF; + index += 0x4000; // alu constants + for (uint32_t n = 0; n < size; n++, index++) { + uint32_t data = poly::load_and_swap( + membase_ + GpuToCpu(packet_ptr, address + n * 4)); + const char* reg_name = register_file_->GetRegisterName(index); + XETRACECP("[%.8X] %.8X -> %.4X %s", packet_ptr, data, index, + reg_name ? reg_name : ""); + WriteRegister(packet_ptr, index, data); + } + return true; +} + +bool CommandProcessor::ExecutePacketType3_IM_LOAD(RingbufferReader* reader, + uint32_t packet_ptr, + uint32_t packet, + uint32_t count) { + // load sequencer instruction memory (pointer-based) + XETRACECP("[%.8X] Packet(%.8X): PM4_IM_LOAD", packet_ptr, packet); + reader->TraceData(count); + uint32_t addr_type = reader->Read(); + auto shader_type = static_cast(addr_type & 0x3); + uint32_t addr = addr_type & ~0x3; + uint32_t start_size = reader->Read(); + uint32_t start = start_size >> 16; + uint32_t size = start_size & 0xFFFF; // dwords + assert_true(start == 0); + /*driver_->LoadShader(shader_type, + GpuToCpu(packet_ptr, addr), size * 4, start);*/ + return true; +} + +bool CommandProcessor::ExecutePacketType3_IM_LOAD_IMMEDIATE( + RingbufferReader* reader, uint32_t packet_ptr, uint32_t packet, + uint32_t count) { + // load sequencer instruction memory (code embedded in packet) + XETRACECP("[%.8X] Packet(%.8X): PM4_IM_LOAD_IMMEDIATE", packet_ptr, packet); + reader->TraceData(count); + uint32_t dword0 = reader->Read(); + uint32_t dword1 = reader->Read(); + auto shader_type = static_cast(dword0); + uint32_t start_size = dword1; + uint32_t start = start_size >> 16; + uint32_t size = start_size & 0xFFFF; // dwords + assert_true(start == 0); + // TODO(benvanik): figure out if this could wrap. + reader->CheckRead(size); + /*driver_->LoadShader(shader_type, reader->ptr(), size * 4, + start);*/ + reader->Advance(size); + return true; +} + +bool CommandProcessor::ExecutePacketType3_INVALIDATE_STATE( + RingbufferReader* reader, uint32_t packet_ptr, uint32_t packet, + uint32_t count) { + // selective invalidation of state pointers + XETRACECP("[%.8X] Packet(%.8X): PM4_INVALIDATE_STATE", packet_ptr, packet); + reader->TraceData(count); + uint32_t mask = reader->Read(); + // driver_->InvalidateState(mask); + return true; +} + +} // namespace gl4 +} // namespace gpu +} // namespace xe diff --git a/src/xenia/gpu/gl4/command_processor.h b/src/xenia/gpu/gl4/command_processor.h new file mode 100644 index 000000000..dfbd868fd --- /dev/null +++ b/src/xenia/gpu/gl4/command_processor.h @@ -0,0 +1,138 @@ +/** +****************************************************************************** +* Xenia : Xbox 360 Emulator Research Project * +****************************************************************************** +* Copyright 2014 Ben Vanik. All rights reserved. * +* Released under the BSD license - see LICENSE in the root for more details. * +****************************************************************************** +*/ + +#ifndef XENIA_GPU_GL4_COMMAND_PROCESSOR_H_ +#define XENIA_GPU_GL4_COMMAND_PROCESSOR_H_ + +#include +#include +#include + +#include +#include +#include + +namespace xe { +namespace gpu { +namespace gl4 { + +class GL4GraphicsSystem; + +class CommandProcessor { + public: + CommandProcessor(GL4GraphicsSystem* graphics_system); + ~CommandProcessor(); + + void set_swap_handler(std::function fn) { swap_handler_ = fn; } + + uint64_t QueryTime(); + uint32_t counter() const { return counter_; } + void increment_counter() { counter_++; } + + void Initialize(uint32_t ptr, uint32_t page_count); + void Shutdown(); + void EnableReadPointerWriteBack(uint32_t ptr, uint32_t block_size); + + void UpdateWritePointer(uint32_t value); + + private: + class RingbufferReader; + + void WorkerMain(); + + void WriteRegister(uint32_t packet_ptr, uint32_t index, uint32_t value); + void MakeCoherent(); + + void ExecutePrimaryBuffer(uint32_t start_index, uint32_t end_index); + void ExecuteIndirectBuffer(uint32_t ptr, uint32_t length); + bool ExecutePacket(RingbufferReader* reader); + bool ExecutePacketType0(RingbufferReader* reader, uint32_t packet_ptr, + uint32_t packet); + bool ExecutePacketType1(RingbufferReader* reader, uint32_t packet_ptr, + uint32_t packet); + bool ExecutePacketType2(RingbufferReader* reader, uint32_t packet_ptr, + uint32_t packet); + bool ExecutePacketType3(RingbufferReader* reader, uint32_t packet_ptr, + uint32_t packet); + bool ExecutePacketType3_ME_INIT(RingbufferReader* reader, uint32_t packet_ptr, + uint32_t packet, uint32_t count); + bool ExecutePacketType3_NOP(RingbufferReader* reader, uint32_t packet_ptr, + uint32_t packet, uint32_t count); + bool ExecutePacketType3_INTERRUPT(RingbufferReader* reader, + uint32_t packet_ptr, uint32_t packet, + uint32_t count); + bool ExecutePacketType3_XE_SWAP(RingbufferReader* reader, uint32_t packet_ptr, + uint32_t packet, uint32_t count); + bool ExecutePacketType3_INDIRECT_BUFFER(RingbufferReader* reader, + uint32_t packet_ptr, uint32_t packet, + uint32_t count); + bool ExecutePacketType3_WAIT_REG_MEM(RingbufferReader* reader, + uint32_t packet_ptr, uint32_t packet, + uint32_t count); + bool ExecutePacketType3_REG_RMW(RingbufferReader* reader, uint32_t packet_ptr, + uint32_t packet, uint32_t count); + bool ExecutePacketType3_COND_WRITE(RingbufferReader* reader, + uint32_t packet_ptr, uint32_t packet, + uint32_t count); + bool ExecutePacketType3_EVENT_WRITE(RingbufferReader* reader, + uint32_t packet_ptr, uint32_t packet, + uint32_t count); + bool ExecutePacketType3_EVENT_WRITE_SHD(RingbufferReader* reader, + uint32_t packet_ptr, uint32_t packet, + uint32_t count); + bool ExecutePacketType3_DRAW_INDX(RingbufferReader* reader, + uint32_t packet_ptr, uint32_t packet, + uint32_t count); + bool ExecutePacketType3_DRAW_INDX_2(RingbufferReader* reader, + uint32_t packet_ptr, uint32_t packet, + uint32_t count); + bool ExecutePacketType3_SET_CONSTANT(RingbufferReader* reader, + uint32_t packet_ptr, uint32_t packet, + uint32_t count); + bool ExecutePacketType3_LOAD_ALU_CONSTANT(RingbufferReader* reader, + uint32_t packet_ptr, + uint32_t packet, uint32_t count); + bool ExecutePacketType3_IM_LOAD(RingbufferReader* reader, uint32_t packet_ptr, + uint32_t packet, uint32_t count); + bool ExecutePacketType3_IM_LOAD_IMMEDIATE(RingbufferReader* reader, + uint32_t packet_ptr, + uint32_t packet, uint32_t count); + bool ExecutePacketType3_INVALIDATE_STATE(RingbufferReader* reader, + uint32_t packet_ptr, uint32_t packet, + uint32_t count); + + Memory* memory_; + uint8_t* membase_; + GL4GraphicsSystem* graphics_system_; + RegisterFile* register_file_; + + std::thread worker_thread_; + std::atomic worker_running_; + + std::function swap_handler_; + + uint64_t time_base_; + uint32_t counter_; + + uint32_t primary_buffer_ptr_; + uint32_t primary_buffer_size_; + + uint32_t read_ptr_index_; + uint32_t read_ptr_update_freq_; + uint32_t read_ptr_writeback_ptr_; + + HANDLE write_ptr_index_event_; + std::atomic write_ptr_index_; +}; + +} // namespace gl4 +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_GL4_COMMAND_PROCESSOR_H_ diff --git a/src/xenia/gpu/gl4/gl4_graphics_system.cc b/src/xenia/gpu/gl4/gl4_graphics_system.cc index 9ed7f01c9..fbff321ff 100644 --- a/src/xenia/gpu/gl4/gl4_graphics_system.cc +++ b/src/xenia/gpu/gl4/gl4_graphics_system.cc @@ -9,26 +9,148 @@ #include +#include +#include +#include + namespace xe { namespace gpu { namespace gl4 { GL4GraphicsSystem::GL4GraphicsSystem(Emulator* emulator) - : GraphicsSystem(emulator) {} + : GraphicsSystem(emulator), timer_queue_(nullptr), vsync_timer_(nullptr) {} GL4GraphicsSystem::~GL4GraphicsSystem() = default; X_STATUS GL4GraphicsSystem::Setup() { + auto result = GraphicsSystem::Setup(); + if (result) { + return result; + } + + // Create rendering control. + // This must happen on the UI thread. + poly::threading::Fence control_ready_fence; auto loop = emulator_->main_window()->loop(); - loop->Post([this]() { - control_ = std::make_unique(); + loop->Post([&]() { + control_ = std::make_unique(loop); emulator_->main_window()->AddChild(control_.get()); + control_ready_fence.Signal(); }); + control_ready_fence.Wait(); + + // Create command processor. This will spin up a thread to process all + // incoming ringbuffer packets. + command_processor_ = std::make_unique(this); + command_processor_->set_swap_handler( + std::bind(&GL4GraphicsSystem::SwapHandler, this)); + + // Let the processor know we want register access callbacks. + emulator_->memory()->AddMappedRange( + 0x7FC80000, 0xFFFF0000, 0x0000FFFF, this, + reinterpret_cast(MMIOReadRegisterThunk), + reinterpret_cast(MMIOWriteRegisterThunk)); + + // 60hz vsync timer. + timer_queue_ = CreateTimerQueue(); + CreateTimerQueueTimer(&vsync_timer_, timer_queue_, + (WAITORTIMERCALLBACK)VsyncCallbackThunk, this, 16, 16, + WT_EXECUTEINTIMERTHREAD); + return X_STATUS_SUCCESS; } void GL4GraphicsSystem::Shutdown() { + DeleteTimerQueueTimer(timer_queue_, vsync_timer_, nullptr); + DeleteTimerQueue(timer_queue_); + + command_processor_->Shutdown(); + + // TODO(benvanik): remove mapped range. + + command_processor_.reset(); control_.reset(); + + GraphicsSystem::Shutdown(); +} + +void GL4GraphicsSystem::InitializeRingBuffer(uint32_t ptr, + uint32_t page_count) { + command_processor_->Initialize(ptr, page_count); +} + +void GL4GraphicsSystem::EnableReadPointerWriteBack(uint32_t ptr, + uint32_t block_size) { + command_processor_->EnableReadPointerWriteBack(ptr, block_size); +} + +void GL4GraphicsSystem::MarkVblank() { + static bool thread_name_set = false; + if (!thread_name_set) { + thread_name_set = true; + Profiler::ThreadEnter("GL4 Vsync Timer"); + } + SCOPE_profile_cpu_f("gpu"); + + // Increment vblank counter (so the game sees us making progress). + command_processor_->increment_counter(); + + // TODO(benvanik): we shouldn't need to do the dispatch here, but there's + // something wrong and the CP will block waiting for code that + // needs to be run in the interrupt. + DispatchInterruptCallback(0, 2); +} + +void GL4GraphicsSystem::SwapHandler() { + SCOPE_profile_cpu_f("gpu"); + + // Swap requested. Synchronously post a request to the loop so that + // we do the swap in the right thread. + control_->SynchronousRepaint(); + + // Roll over vblank. + MarkVblank(); +} + +uint64_t GL4GraphicsSystem::ReadRegister(uint64_t addr) { + uint32_t r = addr & 0xFFFF; + if (FLAGS_trace_ring_buffer) { + XELOGGPU("ReadRegister(%.4X)", r); + } + + switch (r) { + case 0x6530: // ???? + return 1; + case 0x6544: // ? vblank pending? + return 1; + case 0x6584: // ???? + return 1; + } + + assert_true(r >= 0 && r < RegisterFile::kRegisterCount); + return register_file_.values[r].u32; +} + +void GL4GraphicsSystem::WriteRegister(uint64_t addr, uint64_t value) { + uint32_t r = addr & 0xFFFF; + if (FLAGS_trace_ring_buffer) { + XELOGGPU("WriteRegister(%.4X, %.8X)", r, value); + } + + switch (r) { + case 0x0714: // CP_RB_WPTR + command_processor_->UpdateWritePointer(static_cast(value)); + break; + case 0x6110: // ? swap related? + XELOGW("Unimplemented GPU register %.4X write: %.8X", r, value); + return; + default: + XELOGW("Unknown GPU register %.4X write: %.8X", r, value); + break; + } + + assert_true(r >= 0 && r < RegisterFile::kRegisterCount); + register_file_.values[r].u32 = static_cast(value); } } // namespace gl4 diff --git a/src/xenia/gpu/gl4/gl4_graphics_system.h b/src/xenia/gpu/gl4/gl4_graphics_system.h index a374c79d4..982048f24 100644 --- a/src/xenia/gpu/gl4/gl4_graphics_system.h +++ b/src/xenia/gpu/gl4/gl4_graphics_system.h @@ -13,8 +13,10 @@ #include #include +#include #include #include +#include namespace xe { namespace gpu { @@ -28,8 +30,34 @@ class GL4GraphicsSystem : public GraphicsSystem { X_STATUS Setup() override; void Shutdown() override; + RegisterFile* register_file() { return ®ister_file_; } + + void InitializeRingBuffer(uint32_t ptr, uint32_t page_count) override; + void EnableReadPointerWriteBack(uint32_t ptr, uint32_t block_size) override; + private: + void MarkVblank(); + void SwapHandler(); + uint64_t ReadRegister(uint64_t addr); + void WriteRegister(uint64_t addr, uint64_t value); + + static uint64_t MMIOReadRegisterThunk(GL4GraphicsSystem* gs, uint64_t addr) { + return gs->ReadRegister(addr); + } + static void MMIOWriteRegisterThunk(GL4GraphicsSystem* gs, uint64_t addr, + uint64_t value) { + gs->WriteRegister(addr, value); + } + static void __stdcall VsyncCallbackThunk(GL4GraphicsSystem* gs, BOOLEAN) { + gs->MarkVblank(); + } + + RegisterFile register_file_; + std::unique_ptr command_processor_; std::unique_ptr control_; + + HANDLE timer_queue_; + HANDLE vsync_timer_; }; } // namespace gl4 diff --git a/src/xenia/gpu/gl4/sources.gypi b/src/xenia/gpu/gl4/sources.gypi index 633611ac8..e6b5bd3ea 100644 --- a/src/xenia/gpu/gl4/sources.gypi +++ b/src/xenia/gpu/gl4/sources.gypi @@ -1,6 +1,8 @@ # Copyright 2014 Ben Vanik. All Rights Reserved. { 'sources': [ + 'command_processor.cc', + 'command_processor.h', 'gl4_gpu-private.h', 'gl4_gpu.cc', 'gl4_gpu.h', diff --git a/src/xenia/gpu/gl4/wgl_control.cc b/src/xenia/gpu/gl4/wgl_control.cc index 2b895cc0c..5040fcd3a 100644 --- a/src/xenia/gpu/gl4/wgl_control.cc +++ b/src/xenia/gpu/gl4/wgl_control.cc @@ -9,14 +9,17 @@ #include +#include #include +#include namespace xe { namespace gpu { namespace gl4 { -WGLControl::WGLControl() - : poly::ui::win32::Win32Control(Flags::kFlagOwnPaint) {} +WGLControl::WGLControl(poly::ui::Loop* loop) + : poly::ui::win32::Win32Control(Flags::kFlagOwnPaint), + loop_(loop) {} WGLControl::~WGLControl() = default; @@ -68,24 +71,31 @@ bool WGLControl::Create() { return true; } -void WGLControl::OnLayout(poly::ui::UIEvent& e) { - Control::ResizeToFill(); -} +void WGLControl::OnLayout(poly::ui::UIEvent& e) { Control::ResizeToFill(); } LRESULT WGLControl::WndProc(HWND hWnd, UINT message, WPARAM wParam, LPARAM lParam) { switch (message) { - case WM_PAINT: - context_.MakeCurrent(); - glViewport(0, 0, width_, height_); - glClearColor(1.0f, 0, 0, 1.0f); - glClear(GL_COLOR_BUFFER_BIT); - SwapBuffers(context_.dc()); - return 0; + case WM_PAINT: + context_.MakeCurrent(); + glViewport(0, 0, width_, height_); + glClearColor(rand() / (float)RAND_MAX, 1.0f, 0, 1.0f); + glClear(GL_COLOR_BUFFER_BIT); + // TODO(benvanik): profiler present. + // Profiler::Present(); + SwapBuffers(context_.dc()); + break; } return Win32Control::WndProc(hWnd, message, wParam, lParam); } +void WGLControl::SynchronousRepaint() { + SCOPE_profile_cpu_f("gpu"); + // This will not return until the WM_PAINT has completed. + RedrawWindow(hwnd(), nullptr, nullptr, + RDW_INTERNALPAINT | RDW_UPDATENOW | RDW_ALLCHILDREN); +} + } // namespace gl4 } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/gl4/wgl_control.h b/src/xenia/gpu/gl4/wgl_control.h index dd68b14ed..34f7453e9 100644 --- a/src/xenia/gpu/gl4/wgl_control.h +++ b/src/xenia/gpu/gl4/wgl_control.h @@ -10,6 +10,8 @@ #ifndef XENIA_GPU_GL4_WGL_CONTROL_H_ #define XENIA_GPU_GL4_WGL_CONTROL_H_ +#include +#include #include #include @@ -19,11 +21,13 @@ namespace gl4 { class WGLControl : public poly::ui::win32::Win32Control { public: - WGLControl(); + WGLControl(poly::ui::Loop* loop); ~WGLControl() override; GLContext* context() { return &context_; } + void SynchronousRepaint(); + protected: bool Create() override; @@ -33,6 +37,7 @@ class WGLControl : public poly::ui::win32::Win32Control { LPARAM lParam) override; private: + poly::ui::Loop* loop_; GLContext context_; }; diff --git a/src/xenia/gpu/gpu.cc b/src/xenia/gpu/gpu.cc index 66a1688de..92d0a9b94 100644 --- a/src/xenia/gpu/gpu.cc +++ b/src/xenia/gpu/gpu.cc @@ -13,15 +13,15 @@ // TODO(benvanik): based on platform. #include -namespace xe { -namespace gpu { - DEFINE_string(gpu, "any", "Graphics system. Use: [any, gl4]"); DEFINE_bool(trace_ring_buffer, false, "Trace GPU ring buffer packets."); DEFINE_string(dump_shaders, "", "Path to write GPU shaders to as they are compiled."); +namespace xe { +namespace gpu { + std::unique_ptr Create(Emulator* emulator) { if (FLAGS_gpu.compare("gl4") == 0) { return xe::gpu::gl4::Create(emulator); diff --git a/src/xenia/gpu/graphics_system.cc b/src/xenia/gpu/graphics_system.cc index e416a3e0d..f5476e518 100644 --- a/src/xenia/gpu/graphics_system.cc +++ b/src/xenia/gpu/graphics_system.cc @@ -40,15 +40,6 @@ void GraphicsSystem::SetInterruptCallback(uint32_t callback, XELOGGPU("SetInterruptCallback(%.4X, %.4X)", callback, user_data); } -void GraphicsSystem::InitializeRingBuffer(uint32_t ptr, uint32_t page_count) { - // -} - -void GraphicsSystem::EnableReadPointerWriteBack(uint32_t ptr, - uint32_t block_size) { - // -} - void GraphicsSystem::DispatchInterruptCallback(uint32_t source, uint32_t cpu) { // Pick a CPU, if needed. We're going to guess 2. Because. if (cpu == 0xFFFFFFFF) { diff --git a/src/xenia/gpu/graphics_system.h b/src/xenia/gpu/graphics_system.h index 7839d0180..024979110 100644 --- a/src/xenia/gpu/graphics_system.h +++ b/src/xenia/gpu/graphics_system.h @@ -32,14 +32,14 @@ class GraphicsSystem { virtual void Shutdown(); void SetInterruptCallback(uint32_t callback, uint32_t user_data); - void InitializeRingBuffer(uint32_t ptr, uint32_t page_count); - void EnableReadPointerWriteBack(uint32_t ptr, uint32_t block_size); + virtual void InitializeRingBuffer(uint32_t ptr, uint32_t page_count) = 0; + virtual void EnableReadPointerWriteBack(uint32_t ptr, uint32_t block_size) = 0; + + void DispatchInterruptCallback(uint32_t source, uint32_t cpu); protected: GraphicsSystem(Emulator* emulator); - void DispatchInterruptCallback(uint32_t source, uint32_t cpu); - Emulator* emulator_; Memory* memory_; cpu::Processor* processor_;