diff --git a/src/poly/mapped_memory.h b/src/poly/mapped_memory.h index 0d1ffffd2..9db211e4f 100644 --- a/src/poly/mapped_memory.h +++ b/src/poly/mapped_memory.h @@ -18,8 +18,8 @@ namespace poly { class MappedMemory { public: enum class Mode { - READ, - READ_WRITE, + kRead, + kReadWrite, }; virtual ~MappedMemory() = default; diff --git a/src/poly/mapped_memory_win.cc b/src/poly/mapped_memory_win.cc index 98162567c..83320fd12 100644 --- a/src/poly/mapped_memory_win.cc +++ b/src/poly/mapped_memory_win.cc @@ -45,14 +45,14 @@ std::unique_ptr MappedMemory::Open(const std::wstring& path, DWORD mapping_protect = 0; DWORD view_access = 0; switch (mode) { - case Mode::READ: + case Mode::kRead: file_access |= GENERIC_READ; file_share |= FILE_SHARE_READ; create_mode |= OPEN_EXISTING; mapping_protect |= PAGE_READONLY; view_access |= FILE_MAP_READ; break; - case Mode::READ_WRITE: + case Mode::kReadWrite: file_access |= GENERIC_READ | GENERIC_WRITE; file_share |= 0; create_mode |= OPEN_EXISTING; diff --git a/src/xenia/emulator.cc b/src/xenia/emulator.cc index df4960e39..f7011c15b 100644 --- a/src/xenia/emulator.cc +++ b/src/xenia/emulator.cc @@ -102,7 +102,7 @@ X_STATUS Emulator::Setup() { } // Initialize the GPU. - graphics_system_ = std::move(xe::gpu::Create(this)); + graphics_system_ = std::move(xe::gpu::Create()); if (!graphics_system_) { return X_STATUS_NOT_IMPLEMENTED; } @@ -122,7 +122,8 @@ X_STATUS Emulator::Setup() { if (result) { return result; } - result = graphics_system_->Setup(); + result = graphics_system_->Setup(processor_.get(), main_window_->loop(), + main_window_.get()); if (result) { return result; } diff --git a/src/xenia/gpu/gl4/command_processor.cc b/src/xenia/gpu/gl4/command_processor.cc index 4eebfc138..47b9e83e6 100644 --- a/src/xenia/gpu/gl4/command_processor.cc +++ b/src/xenia/gpu/gl4/command_processor.cc @@ -22,9 +22,6 @@ #include "third_party/xxhash/xxhash.h" -#define XETRACECP(fmt, ...) \ - if (FLAGS_trace_ring_buffer) XELOGGPU(fmt, ##__VA_ARGS__) - #define FINE_GRAINED_DRAW_SCOPES 1 namespace xe { @@ -56,6 +53,7 @@ CommandProcessor::CommandProcessor(GL4GraphicsSystem* graphics_system) membase_(graphics_system->memory()->membase()), graphics_system_(graphics_system), register_file_(graphics_system_->register_file()), + trace_writer_(graphics_system->memory()->membase()), worker_running_(true), time_base_(0), counter_(0), @@ -94,6 +92,8 @@ uint64_t CommandProcessor::QueryTime() { bool CommandProcessor::Initialize(std::unique_ptr context) { context_ = std::move(context); + pending_fn_event_ = CreateEvent(nullptr, TRUE, FALSE, nullptr); + worker_running_ = true; worker_thread_ = std::thread([this]() { poly::threading::set_name("GL4 Worker"); @@ -106,6 +106,8 @@ bool CommandProcessor::Initialize(std::unique_ptr context) { } void CommandProcessor::Shutdown() { + EndTracing(); + worker_running_ = false; SetEvent(write_ptr_index_event_); worker_thread_.join(); @@ -115,6 +117,22 @@ void CommandProcessor::Shutdown() { shader_cache_.clear(); context_.reset(); + + CloseHandle(pending_fn_event_); +} + +void CommandProcessor::BeginTracing(const std::wstring& root_path) { + std::wstring path = poly::join_paths(root_path, L"gpu_trace"); + trace_writer_.Open(path); +} + +void CommandProcessor::EndTracing() { trace_writer_.Close(); } + +void CommandProcessor::CallInThread(std::function fn) { + assert_null(pending_fn_); + pending_fn_ = std::move(fn); + WaitForSingleObject(pending_fn_event_, INFINITE); + ResetEvent(pending_fn_event_); } void CommandProcessor::WorkerMain() { @@ -125,6 +143,13 @@ void CommandProcessor::WorkerMain() { } while (worker_running_) { + if (pending_fn_) { + auto fn = std::move(pending_fn_); + pending_fn_ = nullptr; + fn(); + SetEvent(pending_fn_event_); + } + uint32_t write_ptr_index = write_ptr_index_.load(); if (write_ptr_index == 0xBAADF00D || read_ptr_index_ == write_ptr_index) { SCOPE_profile_cpu_i("gpu", "xe::gpu::gl4::CommandProcessor::Stall"); @@ -140,15 +165,15 @@ void CommandProcessor::WorkerMain() { SwitchToThread(); MemoryBarrier(); write_ptr_index = write_ptr_index_.load(); - } while (write_ptr_index == 0xBAADF00D || - read_ptr_index_ == write_ptr_index); + } while (!pending_fn_ && (write_ptr_index == 0xBAADF00D || + read_ptr_index_ == write_ptr_index)); // ReturnFromWait(); + if (pending_fn_) { + continue; + } } assert_true(read_ptr_index_ != write_ptr_index); - // Process the new commands. - XETRACECP("Command processor thread work"); - // Execute. Note that we handle wraparound transparently. ExecutePrimaryBuffer(read_ptr_index_, write_ptr_index); read_ptr_index_ = write_ptr_index; @@ -378,8 +403,7 @@ void CommandProcessor::UpdateWritePointer(uint32_t value) { SetEvent(write_ptr_index_event_); } -void CommandProcessor::WriteRegister(uint32_t packet_ptr, uint32_t index, - uint32_t value) { +void CommandProcessor::WriteRegister(uint32_t index, uint32_t value) { RegisterFile* regs = register_file_; assert_true(index < RegisterFile::kRegisterCount); regs->values[index].u32 = value; @@ -398,8 +422,8 @@ void CommandProcessor::WriteRegister(uint32_t packet_ptr, uint32_t index, // Enabled - write to address. uint32_t scratch_addr = regs->values[XE_GPU_REG_SCRATCH_ADDR].u32; uint32_t mem_addr = scratch_addr + (scratch_reg * 4); - poly::store_and_swap( - membase_ + xenos::GpuToCpu(primary_buffer_ptr_, mem_addr), value); + poly::store_and_swap(membase_ + xenos::GpuToCpu(mem_addr), + value); } } } @@ -426,8 +450,8 @@ void CommandProcessor::MakeCoherent() { } // TODO(benvanik): notify resource cache of base->size and type. - XETRACECP("Make %.8X -> %.8X (%db) coherent", base_host, - base_host + size_host, size_host); + // XELOGD("Make %.8X -> %.8X (%db) coherent", base_host, base_host + + // size_host, size_host); // Mark coherent. status_host &= ~0x80000000ul; @@ -437,6 +461,8 @@ void CommandProcessor::MakeCoherent() { void CommandProcessor::PrepareForWait() { SCOPE_profile_cpu_f("gpu"); + trace_writer_.Flush(); + // TODO(benvanik): fences and fancy stuff. We should figure out a way to // make interrupt callbacks from the GPU so that we don't have to do a full // synchronize here. @@ -494,14 +520,6 @@ class CommandProcessor::RingbufferReader { void Skip(uint32_t words) { Advance(words); } - void TraceData(uint32_t words) { - for (uint32_t i = 0; i < words; ++i) { - uint32_t i_ptr = ptr_ + i * sizeof(uint32_t); - XETRACECP("[%.8X] %.8X", i_ptr, - poly::load_and_swap(membase_ + i_ptr)); - } - } - private: uint8_t* membase_; @@ -523,8 +541,7 @@ void CommandProcessor::ExecutePrimaryBuffer(uint32_t start_index, uint32_t end_ptr = primary_buffer_ptr_ + end_index * sizeof(uint32_t); end_ptr = (primary_buffer_ptr_ & ~0x1FFFFFFF) | (end_ptr & 0x1FFFFFFF); - XETRACECP("[%.8X] ExecutePrimaryBuffer(%dw -> %dw)", start_ptr, start_index, - end_index); + trace_writer_.WritePrimaryBufferStart(start_ptr, end_index - start_index); // Execute commands! uint32_t ptr_mask = (primary_buffer_size_ / sizeof(uint32_t)) - 1; @@ -537,13 +554,13 @@ void CommandProcessor::ExecutePrimaryBuffer(uint32_t start_index, assert_true(reader.offset() == (end_index - start_index)); } - XETRACECP(" ExecutePrimaryBuffer End"); + trace_writer_.WritePrimaryBufferEnd(); } void CommandProcessor::ExecuteIndirectBuffer(uint32_t ptr, uint32_t length) { SCOPE_profile_cpu_f("gpu"); - XETRACECP("[%.8X] ExecuteIndirectBuffer(%dw)", ptr, length); + trace_writer_.WriteIndirectBufferStart(ptr, length / sizeof(uint32_t)); // Execute commands! uint32_t ptr_mask = 0; @@ -553,29 +570,38 @@ void CommandProcessor::ExecuteIndirectBuffer(uint32_t ptr, uint32_t length) { ExecutePacket(&reader); } - XETRACECP(" ExecuteIndirectBuffer End"); + trace_writer_.WriteIndirectBufferEnd(); +} + +void CommandProcessor::ExecutePacket(uint32_t ptr, uint32_t count) { + uint32_t ptr_mask = 0; + RingbufferReader reader(membase_, primary_buffer_ptr_, ptr_mask, ptr, + ptr + count * sizeof(uint32_t)); + while (reader.can_read()) { + ExecutePacket(&reader); + } } bool CommandProcessor::ExecutePacket(RingbufferReader* reader) { RegisterFile* regs = register_file_; - uint32_t packet_ptr = reader->ptr(); const uint32_t packet = reader->Read(); const uint32_t packet_type = packet >> 30; if (packet == 0) { - XETRACECP("[%.8X] Packet(%.8X): 0?", packet_ptr, packet); + trace_writer_.WritePacketStart(reader->ptr() - 4, 1); + trace_writer_.WritePacketEnd(); return true; } switch (packet_type) { case 0x00: - return ExecutePacketType0(reader, packet_ptr, packet); + return ExecutePacketType0(reader, packet); case 0x01: - return ExecutePacketType1(reader, packet_ptr, packet); + return ExecutePacketType1(reader, packet); case 0x02: - return ExecutePacketType2(reader, packet_ptr, packet); + return ExecutePacketType2(reader, packet); case 0x03: - return ExecutePacketType3(reader, packet_ptr, packet); + return ExecutePacketType3(reader, packet); default: assert_unhandled_case(packet_type); return false; @@ -583,75 +609,66 @@ bool CommandProcessor::ExecutePacket(RingbufferReader* reader) { } bool CommandProcessor::ExecutePacketType0(RingbufferReader* reader, - uint32_t packet_ptr, uint32_t packet) { // Type-0 packet. // Write count registers in sequence to the registers starting at // (base_index << 2). - XETRACECP("[%.8X] Packet(%.8X): set registers:", packet_ptr, packet); + uint32_t count = ((packet >> 16) & 0x3FFF) + 1; + trace_writer_.WritePacketStart(reader->ptr() - 4, 1 + count); + uint32_t base_index = (packet & 0x7FFF); uint32_t write_one_reg = (packet >> 15) & 0x1; for (uint32_t m = 0; m < count; m++) { - uint32_t reg_data = reader->Peek(); + uint32_t reg_data = reader->Read(); uint32_t target_index = write_one_reg ? base_index : base_index + m; - const char* reg_name = register_file_->GetRegisterName(target_index); - XETRACECP("[%.8X] %.8X -> %.4X %s", reader->ptr(), reg_data, target_index, - reg_name ? reg_name : ""); - reader->Advance(1); - WriteRegister(packet_ptr, target_index, reg_data); + WriteRegister(target_index, reg_data); } + + trace_writer_.WritePacketEnd(); return true; } bool CommandProcessor::ExecutePacketType1(RingbufferReader* reader, - uint32_t packet_ptr, uint32_t packet) { // Type-1 packet. // Contains two registers of data. Type-0 should be more common. - XETRACECP("[%.8X] Packet(%.8X): set registers:", packet_ptr, packet); + trace_writer_.WritePacketStart(reader->ptr() - 4, 3); uint32_t reg_index_1 = packet & 0x7FF; uint32_t reg_index_2 = (packet >> 11) & 0x7FF; - uint32_t reg_ptr_1 = reader->ptr(); uint32_t reg_data_1 = reader->Read(); - uint32_t reg_ptr_2 = reader->ptr(); uint32_t reg_data_2 = reader->Read(); - const char* reg_name_1 = register_file_->GetRegisterName(reg_index_1); - const char* reg_name_2 = register_file_->GetRegisterName(reg_index_2); - XETRACECP("[%.8X] %.8X -> %.4X %s", reg_ptr_1, reg_data_1, reg_index_1, - reg_name_1 ? reg_name_1 : ""); - XETRACECP("[%.8X] %.8X -> %.4X %s", reg_ptr_2, reg_data_2, reg_index_2, - reg_name_2 ? reg_name_2 : ""); - WriteRegister(packet_ptr, reg_index_1, reg_data_1); - WriteRegister(packet_ptr, reg_index_2, reg_data_2); + WriteRegister(reg_index_1, reg_data_1); + WriteRegister(reg_index_2, reg_data_2); + trace_writer_.WritePacketEnd(); return true; } bool CommandProcessor::ExecutePacketType2(RingbufferReader* reader, - uint32_t packet_ptr, uint32_t packet) { // Type-2 packet. // No-op. Do nothing. - XETRACECP("[%.8X] Packet(%.8X): padding", packet_ptr, packet); + trace_writer_.WritePacketStart(reader->ptr() - 4, 1); + trace_writer_.WritePacketEnd(); return true; } bool CommandProcessor::ExecutePacketType3(RingbufferReader* reader, - uint32_t packet_ptr, uint32_t packet) { // Type-3 packet. uint32_t opcode = (packet >> 8) & 0x7F; uint32_t count = ((packet >> 16) & 0x3FFF) + 1; auto data_start_offset = reader->offset(); + trace_writer_.WritePacketStart(reader->ptr() - 4, 1 + count); + // & 1 == predicate - when set, we do bin check to see if we should execute // the packet. Only type 3 packets are affected. if (packet & 1) { bool any_pass = (bin_select_ & bin_mask_) != 0; if (!any_pass) { - XETRACECP("[%.8X] Packet(%.8X): SKIPPED (predicate fail)", packet_ptr, - packet); reader->Skip(count); + trace_writer_.WritePacketEnd(); return true; } } @@ -659,96 +676,78 @@ bool CommandProcessor::ExecutePacketType3(RingbufferReader* reader, bool result = false; switch (opcode) { case PM4_ME_INIT: - result = ExecutePacketType3_ME_INIT(reader, packet_ptr, packet, count); + result = ExecutePacketType3_ME_INIT(reader, packet, count); break; case PM4_NOP: - result = ExecutePacketType3_NOP(reader, packet_ptr, packet, count); + result = ExecutePacketType3_NOP(reader, packet, count); break; case PM4_INTERRUPT: - result = ExecutePacketType3_INTERRUPT(reader, packet_ptr, packet, count); + result = ExecutePacketType3_INTERRUPT(reader, packet, count); break; case PM4_XE_SWAP: - result = ExecutePacketType3_XE_SWAP(reader, packet_ptr, packet, count); + result = ExecutePacketType3_XE_SWAP(reader, packet, count); break; case PM4_INDIRECT_BUFFER: - result = - ExecutePacketType3_INDIRECT_BUFFER(reader, packet_ptr, packet, count); + result = ExecutePacketType3_INDIRECT_BUFFER(reader, packet, count); break; case PM4_WAIT_REG_MEM: - result = - ExecutePacketType3_WAIT_REG_MEM(reader, packet_ptr, packet, count); + result = ExecutePacketType3_WAIT_REG_MEM(reader, packet, count); break; case PM4_REG_RMW: - result = ExecutePacketType3_REG_RMW(reader, packet_ptr, packet, count); + result = ExecutePacketType3_REG_RMW(reader, packet, count); break; case PM4_COND_WRITE: - result = ExecutePacketType3_COND_WRITE(reader, packet_ptr, packet, count); + result = ExecutePacketType3_COND_WRITE(reader, packet, count); break; case PM4_EVENT_WRITE: - result = - ExecutePacketType3_EVENT_WRITE(reader, packet_ptr, packet, count); + result = ExecutePacketType3_EVENT_WRITE(reader, packet, count); break; case PM4_EVENT_WRITE_SHD: - result = - ExecutePacketType3_EVENT_WRITE_SHD(reader, packet_ptr, packet, count); + result = ExecutePacketType3_EVENT_WRITE_SHD(reader, packet, count); break; case PM4_EVENT_WRITE_EXT: - result = - ExecutePacketType3_EVENT_WRITE_EXT(reader, packet_ptr, packet, count); + result = ExecutePacketType3_EVENT_WRITE_EXT(reader, packet, count); break; case PM4_DRAW_INDX: - result = ExecutePacketType3_DRAW_INDX(reader, packet_ptr, packet, count); + result = ExecutePacketType3_DRAW_INDX(reader, packet, count); break; case PM4_DRAW_INDX_2: - result = - ExecutePacketType3_DRAW_INDX_2(reader, packet_ptr, packet, count); + result = ExecutePacketType3_DRAW_INDX_2(reader, packet, count); break; case PM4_SET_CONSTANT: - result = - ExecutePacketType3_SET_CONSTANT(reader, packet_ptr, packet, count); + result = ExecutePacketType3_SET_CONSTANT(reader, packet, count); break; case PM4_LOAD_ALU_CONSTANT: - result = ExecutePacketType3_LOAD_ALU_CONSTANT(reader, packet_ptr, packet, - count); + result = ExecutePacketType3_LOAD_ALU_CONSTANT(reader, packet, count); break; case PM4_IM_LOAD: - result = ExecutePacketType3_IM_LOAD(reader, packet_ptr, packet, count); + result = ExecutePacketType3_IM_LOAD(reader, packet, count); break; case PM4_IM_LOAD_IMMEDIATE: - result = ExecutePacketType3_IM_LOAD_IMMEDIATE(reader, packet_ptr, packet, - count); + result = ExecutePacketType3_IM_LOAD_IMMEDIATE(reader, packet, count); break; case PM4_INVALIDATE_STATE: - result = ExecutePacketType3_INVALIDATE_STATE(reader, packet_ptr, packet, - count); + result = ExecutePacketType3_INVALIDATE_STATE(reader, packet, count); break; case PM4_SET_BIN_MASK_LO: { uint32_t value = reader->Read(); - XETRACECP("[%.8X] Packet(%.8X): PM4_SET_BIN_MASK_LO = %.8X", packet_ptr, - packet, value); bin_mask_ = (bin_mask_ & 0xFFFFFFFF00000000ull) | value; result = true; } break; case PM4_SET_BIN_MASK_HI: { uint32_t value = reader->Read(); - XETRACECP("[%.8X] Packet(%.8X): PM4_SET_BIN_MASK_HI = %.8X", packet_ptr, - packet, value); bin_mask_ = (bin_mask_ & 0xFFFFFFFFull) | (static_cast(value) << 32); result = true; } break; case PM4_SET_BIN_SELECT_LO: { uint32_t value = reader->Read(); - XETRACECP("[%.8X] Packet(%.8X): PM4_SET_BIN_SELECT_LO = %.8X", packet_ptr, - packet, value); bin_select_ = (bin_select_ & 0xFFFFFFFF00000000ull) | value; result = true; } break; case PM4_SET_BIN_SELECT_HI: { uint32_t value = reader->Read(); - XETRACECP("[%.8X] Packet(%.8X): PM4_SET_BIN_SELECT_HI = %.8X", packet_ptr, - packet, value); bin_select_ = (bin_select_ & 0xFFFFFFFFull) | (static_cast(value) << 32); result = true; @@ -757,53 +756,44 @@ bool CommandProcessor::ExecutePacketType3(RingbufferReader* reader, // Ignored packets - useful if breaking on the default handler below. case 0x50: // 0xC0015000 usually 2 words, 0xFFFFFFFF / 0x00000000 case 0x51: // 0xC0015100 usually 2 words, 0xFFFFFFFF / 0xFFFFFFFF - XETRACECP("[%.8X] Packet(%.8X): unknown!", packet_ptr, packet); - reader->TraceData(count); reader->Skip(count); break; default: - XETRACECP("[%.8X] Packet(%.8X): unknown!", packet_ptr, packet); - reader->TraceData(count); reader->Skip(count); break; } + trace_writer_.WritePacketEnd(); assert_true(reader->offset() == data_start_offset + count); return result; } bool CommandProcessor::ExecutePacketType3_ME_INIT(RingbufferReader* reader, - uint32_t packet_ptr, + uint32_t packet, uint32_t count) { // initialize CP's micro-engine - XETRACECP("[%.8X] Packet(%.8X): PM4_ME_INIT", packet_ptr, packet); - reader->TraceData(count); reader->Advance(count); return true; } bool CommandProcessor::ExecutePacketType3_NOP(RingbufferReader* reader, - uint32_t packet_ptr, + uint32_t packet, uint32_t count) { // skip N 32-bit words to get to the next packet // No-op, ignore some data. - XETRACECP("[%.8X] Packet(%.8X): PM4_NOP", packet_ptr, packet); - reader->TraceData(count); reader->Advance(count); return true; } bool CommandProcessor::ExecutePacketType3_INTERRUPT(RingbufferReader* reader, - uint32_t packet_ptr, + uint32_t packet, uint32_t count) { SCOPE_profile_cpu_f("gpu"); // generate interrupt from the command stream - XETRACECP("[%.8X] Packet(%.8X): PM4_INTERRUPT", packet_ptr, packet); - reader->TraceData(count); uint32_t cpu_mask = reader->Read(); for (int n = 0; n < 6; n++) { if (cpu_mask & (1 << n)) { @@ -814,7 +804,7 @@ bool CommandProcessor::ExecutePacketType3_INTERRUPT(RingbufferReader* reader, } bool CommandProcessor::ExecutePacketType3_XE_SWAP(RingbufferReader* reader, - uint32_t packet_ptr, + uint32_t packet, uint32_t count) { SCOPE_profile_cpu_f("gpu"); @@ -826,9 +816,7 @@ bool CommandProcessor::ExecutePacketType3_XE_SWAP(RingbufferReader* reader, // Xenia-specific VdSwap hook. // VdSwap will post this to tell us we need to swap the screen/fire an // interrupt. - XETRACECP("[%.8X] Packet(%.8X): PM4_XE_SWAP", packet_ptr, packet); // 63 words here, but only the first has any data. - reader->TraceData(1); uint32_t frontbuffer_ptr = reader->Read(); reader->Advance(count - 1); @@ -868,30 +856,28 @@ bool CommandProcessor::ExecutePacketType3_XE_SWAP(RingbufferReader* reader, // Remove any dead textures, etc. texture_cache_.Scavenge(); } + + trace_writer_.WriteEvent(EventType::kSwap); + trace_writer_.Flush(); return true; } bool CommandProcessor::ExecutePacketType3_INDIRECT_BUFFER( - RingbufferReader* reader, uint32_t packet_ptr, uint32_t packet, - uint32_t count) { + RingbufferReader* reader, uint32_t packet, uint32_t count) { // indirect buffer dispatch uint32_t list_ptr = reader->Read(); uint32_t list_length = reader->Read(); - XETRACECP("[%.8X] Packet(%.8X): PM4_INDIRECT_BUFFER %.8X (%dw)", packet_ptr, - packet, list_ptr, list_length); ExecuteIndirectBuffer(GpuToCpu(list_ptr), list_length); return true; } bool CommandProcessor::ExecutePacketType3_WAIT_REG_MEM(RingbufferReader* reader, - uint32_t packet_ptr, + uint32_t packet, uint32_t count) { SCOPE_profile_cpu_f("gpu"); // wait until a register or memory location is a specific value - XETRACECP("[%.8X] Packet(%.8X): PM4_WAIT_REG_MEM", packet_ptr, packet); - reader->TraceData(count); uint32_t wait_info = reader->Read(); uint32_t poll_reg_addr = reader->Read(); uint32_t ref = reader->Read(); @@ -904,9 +890,9 @@ bool CommandProcessor::ExecutePacketType3_WAIT_REG_MEM(RingbufferReader* reader, // Memory. auto endianness = static_cast(poll_reg_addr & 0x3); poll_reg_addr &= ~0x3; - value = - poly::load(membase_ + GpuToCpu(packet_ptr, poll_reg_addr)); + value = poly::load(membase_ + GpuToCpu(poll_reg_addr)); value = GpuSwap(value, endianness); + trace_writer_.WriteMemoryRead(poll_reg_addr, 4); } else { // Register. assert_true(poll_reg_addr < RegisterFile::kRegisterCount); @@ -963,13 +949,11 @@ bool CommandProcessor::ExecutePacketType3_WAIT_REG_MEM(RingbufferReader* reader, } bool CommandProcessor::ExecutePacketType3_REG_RMW(RingbufferReader* reader, - uint32_t packet_ptr, + uint32_t packet, uint32_t count) { // register read/modify/write // ? (used during shader upload and edram setup) - XETRACECP("[%.8X] Packet(%.8X): PM4_REG_RMW", packet_ptr, packet); - reader->TraceData(count); uint32_t rmw_info = reader->Read(); uint32_t and_mask = reader->Read(); uint32_t or_mask = reader->Read(); @@ -988,17 +972,15 @@ bool CommandProcessor::ExecutePacketType3_REG_RMW(RingbufferReader* reader, // & imm value &= and_mask; } - WriteRegister(packet_ptr, rmw_info & 0x1FFF, value); + WriteRegister(rmw_info & 0x1FFF, value); return true; } bool CommandProcessor::ExecutePacketType3_COND_WRITE(RingbufferReader* reader, - uint32_t packet_ptr, + uint32_t packet, uint32_t count) { // conditional write to memory or register - XETRACECP("[%.8X] Packet(%.8X): PM4_COND_WRITE", packet_ptr, packet); - reader->TraceData(count); uint32_t wait_info = reader->Read(); uint32_t poll_reg_addr = reader->Read(); uint32_t ref = reader->Read(); @@ -1010,8 +992,8 @@ bool CommandProcessor::ExecutePacketType3_COND_WRITE(RingbufferReader* reader, // Memory. auto endianness = static_cast(poll_reg_addr & 0x3); poll_reg_addr &= ~0x3; - value = - poly::load(membase_ + GpuToCpu(packet_ptr, poll_reg_addr)); + trace_writer_.WriteMemoryRead(poll_reg_addr, 4); + value = poly::load(membase_ + GpuToCpu(poll_reg_addr)); value = GpuSwap(value, endianness); } else { // Register. @@ -1052,23 +1034,21 @@ bool CommandProcessor::ExecutePacketType3_COND_WRITE(RingbufferReader* reader, auto endianness = static_cast(write_reg_addr & 0x3); write_reg_addr &= ~0x3; write_data = GpuSwap(write_data, endianness); - poly::store(membase_ + GpuToCpu(packet_ptr, write_reg_addr), write_data); + poly::store(membase_ + GpuToCpu(write_reg_addr), write_data); + trace_writer_.WriteMemoryWrite(write_reg_addr, 4); } else { // Register. - WriteRegister(packet_ptr, write_reg_addr, write_data); + WriteRegister(write_reg_addr, write_data); } } return true; } bool CommandProcessor::ExecutePacketType3_EVENT_WRITE(RingbufferReader* reader, - uint32_t packet_ptr, + uint32_t packet, uint32_t count) { // generate an event that creates a write to memory when completed - XETRACECP("[%.8X] Packet(%.8X): PM4_EVENT_WRITE (unimplemented!)", packet_ptr, - packet); - reader->TraceData(count); uint32_t initiator = reader->Read(); if (count == 1) { // Just an event flag? Where does this write? @@ -1081,16 +1061,13 @@ bool CommandProcessor::ExecutePacketType3_EVENT_WRITE(RingbufferReader* reader, } bool CommandProcessor::ExecutePacketType3_EVENT_WRITE_SHD( - RingbufferReader* reader, uint32_t packet_ptr, uint32_t packet, - uint32_t count) { + RingbufferReader* reader, uint32_t packet, uint32_t count) { // generate a VS|PS_done event - XETRACECP("[%.8X] Packet(%.8X): PM4_EVENT_WRITE_SHD", packet_ptr, packet); - reader->TraceData(count); uint32_t initiator = reader->Read(); uint32_t address = reader->Read(); uint32_t value = reader->Read(); // Writeback initiator. - WriteRegister(packet_ptr, XE_GPU_REG_VGT_EVENT_INITIATOR, initiator & 0x3F); + WriteRegister(XE_GPU_REG_VGT_EVENT_INITIATOR, initiator & 0x3F); uint32_t data_value; if ((initiator >> 31) & 0x1) { // Write counter (GPU vblank counter?). @@ -1103,27 +1080,23 @@ bool CommandProcessor::ExecutePacketType3_EVENT_WRITE_SHD( address &= ~0x3; data_value = GpuSwap(data_value, endianness); poly::store(membase_ + GpuToCpu(address), data_value); + trace_writer_.WriteMemoryWrite(address, 4); return true; } bool CommandProcessor::ExecutePacketType3_EVENT_WRITE_EXT( - RingbufferReader* reader, uint32_t packet_ptr, uint32_t packet, - uint32_t count) { + RingbufferReader* reader, uint32_t packet, uint32_t count) { // generate a screen extent event - XETRACECP("[%.8X] Packet(%.8X): PM4_EVENT_WRITE_EXT", packet_ptr, packet); - reader->TraceData(count); uint32_t unk0 = reader->Read(); uint32_t unk1 = reader->Read(); return true; } bool CommandProcessor::ExecutePacketType3_DRAW_INDX(RingbufferReader* reader, - uint32_t packet_ptr, + uint32_t packet, uint32_t count) { // initiate fetch of index buffer and draw - XETRACECP("[%.8X] Packet(%.8X): PM4_DRAW_INDX", packet_ptr, packet); - reader->TraceData(count); // dword0 = viz query info uint32_t dword0 = reader->Read(); uint32_t dword1 = reader->Read(); @@ -1172,12 +1145,10 @@ bool CommandProcessor::ExecutePacketType3_DRAW_INDX(RingbufferReader* reader, } bool CommandProcessor::ExecutePacketType3_DRAW_INDX_2(RingbufferReader* reader, - uint32_t packet_ptr, + uint32_t packet, uint32_t count) { // draw using supplied indices in packet - XETRACECP("[%.8X] Packet(%.8X): PM4_DRAW_INDX_2", packet_ptr, packet); - reader->TraceData(count); uint32_t dword0 = reader->Read(); uint32_t index_count = dword0 >> 16; auto prim_type = static_cast(dword0 & 0x3F); @@ -1198,11 +1169,10 @@ bool CommandProcessor::ExecutePacketType3_DRAW_INDX_2(RingbufferReader* reader, } bool CommandProcessor::ExecutePacketType3_SET_CONSTANT(RingbufferReader* reader, - uint32_t packet_ptr, + uint32_t packet, uint32_t count) { // load constant into chip and to memory - XETRACECP("[%.8X] Packet(%.8X): PM4_SET_CONSTANT", packet_ptr, packet); // PM4_REG(reg) ((0x4 << 16) | (GSL_HAL_SUBBLOCK_OFFSET(reg))) // reg - 0x2000 uint32_t offset_type = reader->Read(); @@ -1213,10 +1183,7 @@ bool CommandProcessor::ExecutePacketType3_SET_CONSTANT(RingbufferReader* reader, index += 0x2000; // registers for (uint32_t n = 0; n < count - 1; n++, index++) { uint32_t data = reader->Read(); - const char* reg_name = register_file_->GetRegisterName(index); - XETRACECP("[%.8X] %.8X -> %.4X %s", packet_ptr + (1 + n) * 4, data, - index, reg_name ? reg_name : ""); - WriteRegister(packet_ptr, index, data); + WriteRegister(index, data); } break; default: @@ -1227,10 +1194,8 @@ bool CommandProcessor::ExecutePacketType3_SET_CONSTANT(RingbufferReader* reader, } bool CommandProcessor::ExecutePacketType3_LOAD_ALU_CONSTANT( - RingbufferReader* reader, uint32_t packet_ptr, uint32_t packet, - uint32_t count) { + RingbufferReader* reader, uint32_t packet, uint32_t count) { // load constants from memory - XETRACECP("[%.8X] Packet(%.8X): PM4_LOAD_ALU_CONSTANT", packet_ptr, packet); uint32_t address = reader->Read(); address &= 0x3FFFFFFF; uint32_t offset_type = reader->Read(); @@ -1238,24 +1203,20 @@ bool CommandProcessor::ExecutePacketType3_LOAD_ALU_CONSTANT( uint32_t size = reader->Read(); size &= 0xFFF; index += 0x4000; // alu constants + trace_writer_.WriteMemoryRead(address, size * 4); for (uint32_t n = 0; n < size; n++, index++) { - uint32_t data = poly::load_and_swap( - membase_ + GpuToCpu(packet_ptr, address + n * 4)); - const char* reg_name = register_file_->GetRegisterName(index); - XETRACECP("[%.8X] %.8X -> %.4X %s", packet_ptr, data, index, - reg_name ? reg_name : ""); - WriteRegister(packet_ptr, index, data); + uint32_t data = + poly::load_and_swap(membase_ + GpuToCpu(address + n * 4)); + WriteRegister(index, data); } return true; } bool CommandProcessor::ExecutePacketType3_IM_LOAD(RingbufferReader* reader, - uint32_t packet_ptr, + uint32_t packet, uint32_t count) { // load sequencer instruction memory (pointer-based) - XETRACECP("[%.8X] Packet(%.8X): PM4_IM_LOAD", packet_ptr, packet); - reader->TraceData(count); uint32_t addr_type = reader->Read(); auto shader_type = static_cast(addr_type & 0x3); uint32_t addr = addr_type & ~0x3; @@ -1263,18 +1224,16 @@ bool CommandProcessor::ExecutePacketType3_IM_LOAD(RingbufferReader* reader, uint32_t start = start_size >> 16; uint32_t size_dwords = start_size & 0xFFFF; // dwords assert_true(start == 0); + trace_writer_.WriteMemoryRead(addr, size_dwords * 4); LoadShader(shader_type, - reinterpret_cast(membase_ + GpuToCpu(packet_ptr, addr)), + reinterpret_cast(membase_ + GpuToCpu(addr)), size_dwords); return true; } bool CommandProcessor::ExecutePacketType3_IM_LOAD_IMMEDIATE( - RingbufferReader* reader, uint32_t packet_ptr, uint32_t packet, - uint32_t count) { + RingbufferReader* reader, uint32_t packet, uint32_t count) { // load sequencer instruction memory (code embedded in packet) - XETRACECP("[%.8X] Packet(%.8X): PM4_IM_LOAD_IMMEDIATE", packet_ptr, packet); - reader->TraceData(count); uint32_t dword0 = reader->Read(); uint32_t dword1 = reader->Read(); auto shader_type = static_cast(dword0); @@ -1290,11 +1249,8 @@ bool CommandProcessor::ExecutePacketType3_IM_LOAD_IMMEDIATE( } bool CommandProcessor::ExecutePacketType3_INVALIDATE_STATE( - RingbufferReader* reader, uint32_t packet_ptr, uint32_t packet, - uint32_t count) { + RingbufferReader* reader, uint32_t packet, uint32_t count) { // selective invalidation of state pointers - XETRACECP("[%.8X] Packet(%.8X): PM4_INVALIDATE_STATE", packet_ptr, packet); - reader->TraceData(count); uint32_t mask = reader->Read(); // driver_->InvalidateState(mask); return true; @@ -1382,7 +1338,6 @@ bool CommandProcessor::IssueDraw() { // No framebuffer, so nothing we do will actually have an effect. // Treat it as a no-op. // TODO(benvanik): if we have a vs export, still allow it to go. - XETRACECP("No-op draw (no framebuffer set)"); draw_batcher_.DiscardDraw(); return true; } @@ -2066,6 +2021,7 @@ CommandProcessor::UpdateStatus CommandProcessor::PopulateIndexBuffer() { : sizeof(uint16_t)); auto allocation = scratch_buffer_.Acquire(total_size); + trace_writer_.WriteMemoryRead(info.guest_base, info.length); if (info.format == IndexFormat::kInt32) { auto dest = reinterpret_cast(allocation.host_ptr); auto src = reinterpret_cast(membase_ + info.guest_base); @@ -2125,6 +2081,8 @@ CommandProcessor::UpdateStatus CommandProcessor::PopulateVertexBuffers() { auto allocation = scratch_buffer_.Acquire(valid_range); + trace_writer_.WriteMemoryRead(fetch->address << 2, valid_range); + // Copy and byte swap the entire buffer. // We could be smart about this to save GPU bandwidth by building a CRC // as we copy and only if it differs from the previous value committing @@ -2236,6 +2194,9 @@ CommandProcessor::UpdateStatus CommandProcessor::PopulateSampler( return UpdateStatus::kCompatible; // invalid texture used } + trace_writer_.WriteMemoryRead(texture_info.guest_address, + texture_info.input_length); + auto entry_view = texture_cache_.Demand(texture_info, sampler_info); if (!entry_view) { // Unable to create/fetch/etc. diff --git a/src/xenia/gpu/gl4/command_processor.h b/src/xenia/gpu/gl4/command_processor.h index 769b581a3..3d47971be 100644 --- a/src/xenia/gpu/gl4/command_processor.h +++ b/src/xenia/gpu/gl4/command_processor.h @@ -23,6 +23,7 @@ #include "xenia/gpu/gl4/gl4_shader.h" #include "xenia/gpu/gl4/texture_cache.h" #include "xenia/gpu/register_file.h" +#include "xenia/gpu/tracing.h" #include "xenia/gpu/xenos.h" #include "xenia/memory.h" @@ -56,12 +57,18 @@ class CommandProcessor { bool Initialize(std::unique_ptr context); void Shutdown(); + void CallInThread(std::function fn); + + void BeginTracing(const std::wstring& root_path); + void EndTracing(); void InitializeRingBuffer(uint32_t ptr, uint32_t page_count); void EnableReadPointerWriteBack(uint32_t ptr, uint32_t block_size); void UpdateWritePointer(uint32_t value); + void ExecutePacket(uint32_t ptr, uint32_t count); + private: class RingbufferReader; @@ -109,7 +116,7 @@ class CommandProcessor { void ShutdownGL(); GLuint CreateGeometryProgram(const std::string& source); - void WriteRegister(uint32_t packet_ptr, uint32_t index, uint32_t value); + void WriteRegister(uint32_t index, uint32_t value); void MakeCoherent(); void PrepareForWait(); void ReturnFromWait(); @@ -117,63 +124,48 @@ class CommandProcessor { void ExecutePrimaryBuffer(uint32_t start_index, uint32_t end_index); void ExecuteIndirectBuffer(uint32_t ptr, uint32_t length); bool ExecutePacket(RingbufferReader* reader); - bool ExecutePacketType0(RingbufferReader* reader, uint32_t packet_ptr, - uint32_t packet); - bool ExecutePacketType1(RingbufferReader* reader, uint32_t packet_ptr, - uint32_t packet); - bool ExecutePacketType2(RingbufferReader* reader, uint32_t packet_ptr, - uint32_t packet); - bool ExecutePacketType3(RingbufferReader* reader, uint32_t packet_ptr, - uint32_t packet); - bool ExecutePacketType3_ME_INIT(RingbufferReader* reader, uint32_t packet_ptr, - uint32_t packet, uint32_t count); - bool ExecutePacketType3_NOP(RingbufferReader* reader, uint32_t packet_ptr, - uint32_t packet, uint32_t count); - bool ExecutePacketType3_INTERRUPT(RingbufferReader* reader, - uint32_t packet_ptr, uint32_t packet, + bool ExecutePacketType0(RingbufferReader* reader, uint32_t packet); + bool ExecutePacketType1(RingbufferReader* reader, uint32_t packet); + bool ExecutePacketType2(RingbufferReader* reader, uint32_t packet); + bool ExecutePacketType3(RingbufferReader* reader, uint32_t packet); + bool ExecutePacketType3_ME_INIT(RingbufferReader* reader, uint32_t packet, + uint32_t count); + bool ExecutePacketType3_NOP(RingbufferReader* reader, uint32_t packet, + uint32_t count); + bool ExecutePacketType3_INTERRUPT(RingbufferReader* reader, uint32_t packet, uint32_t count); - bool ExecutePacketType3_XE_SWAP(RingbufferReader* reader, uint32_t packet_ptr, - uint32_t packet, uint32_t count); + bool ExecutePacketType3_XE_SWAP(RingbufferReader* reader, uint32_t packet, + uint32_t count); bool ExecutePacketType3_INDIRECT_BUFFER(RingbufferReader* reader, - uint32_t packet_ptr, uint32_t packet, - uint32_t count); + uint32_t packet, uint32_t count); bool ExecutePacketType3_WAIT_REG_MEM(RingbufferReader* reader, - uint32_t packet_ptr, uint32_t packet, - uint32_t count); - bool ExecutePacketType3_REG_RMW(RingbufferReader* reader, uint32_t packet_ptr, - uint32_t packet, uint32_t count); - bool ExecutePacketType3_COND_WRITE(RingbufferReader* reader, - uint32_t packet_ptr, uint32_t packet, + uint32_t packet, uint32_t count); + bool ExecutePacketType3_REG_RMW(RingbufferReader* reader, uint32_t packet, + uint32_t count); + bool ExecutePacketType3_COND_WRITE(RingbufferReader* reader, uint32_t packet, uint32_t count); - bool ExecutePacketType3_EVENT_WRITE(RingbufferReader* reader, - uint32_t packet_ptr, uint32_t packet, + bool ExecutePacketType3_EVENT_WRITE(RingbufferReader* reader, uint32_t packet, uint32_t count); bool ExecutePacketType3_EVENT_WRITE_SHD(RingbufferReader* reader, - uint32_t packet_ptr, uint32_t packet, - uint32_t count); + uint32_t packet, uint32_t count); bool ExecutePacketType3_EVENT_WRITE_EXT(RingbufferReader* reader, - uint32_t packet_ptr, uint32_t packet, - uint32_t count); - bool ExecutePacketType3_DRAW_INDX(RingbufferReader* reader, - uint32_t packet_ptr, uint32_t packet, + uint32_t packet, uint32_t count); + bool ExecutePacketType3_DRAW_INDX(RingbufferReader* reader, uint32_t packet, uint32_t count); - bool ExecutePacketType3_DRAW_INDX_2(RingbufferReader* reader, - uint32_t packet_ptr, uint32_t packet, + bool ExecutePacketType3_DRAW_INDX_2(RingbufferReader* reader, uint32_t packet, uint32_t count); bool ExecutePacketType3_SET_CONSTANT(RingbufferReader* reader, - uint32_t packet_ptr, uint32_t packet, - uint32_t count); + uint32_t packet, uint32_t count); bool ExecutePacketType3_LOAD_ALU_CONSTANT(RingbufferReader* reader, - uint32_t packet_ptr, + uint32_t packet, uint32_t count); - bool ExecutePacketType3_IM_LOAD(RingbufferReader* reader, uint32_t packet_ptr, - uint32_t packet, uint32_t count); + bool ExecutePacketType3_IM_LOAD(RingbufferReader* reader, uint32_t packet, + uint32_t count); bool ExecutePacketType3_IM_LOAD_IMMEDIATE(RingbufferReader* reader, - uint32_t packet_ptr, + uint32_t packet, uint32_t count); bool ExecutePacketType3_INVALIDATE_STATE(RingbufferReader* reader, - uint32_t packet_ptr, uint32_t packet, - uint32_t count); + uint32_t packet, uint32_t count); bool LoadShader(ShaderType shader_type, const uint32_t* address, uint32_t dword_count); @@ -206,10 +198,14 @@ class CommandProcessor { GL4GraphicsSystem* graphics_system_; RegisterFile* register_file_; + TraceWriter trace_writer_; + std::thread worker_thread_; std::atomic worker_running_; std::unique_ptr context_; SwapHandler swap_handler_; + std::function pending_fn_; + HANDLE pending_fn_event_; uint64_t time_base_; uint32_t counter_; diff --git a/src/xenia/gpu/gl4/gl4_gpu.cc b/src/xenia/gpu/gl4/gl4_gpu.cc index bcbba56ea..a4c2e1bf1 100644 --- a/src/xenia/gpu/gl4/gl4_gpu.cc +++ b/src/xenia/gpu/gl4/gl4_gpu.cc @@ -47,9 +47,9 @@ void InitializeIfNeeded() { void CleanupOnShutdown() {} -std::unique_ptr Create(Emulator* emulator) { +std::unique_ptr Create() { InitializeIfNeeded(); - return std::make_unique(emulator); + return std::make_unique(); } } // namespace gl4 diff --git a/src/xenia/gpu/gl4/gl4_gpu.h b/src/xenia/gpu/gl4/gl4_gpu.h index c6144cec4..67512bdda 100644 --- a/src/xenia/gpu/gl4/gl4_gpu.h +++ b/src/xenia/gpu/gl4/gl4_gpu.h @@ -19,7 +19,7 @@ namespace xe { namespace gpu { namespace gl4 { -std::unique_ptr Create(Emulator* emulator); +std::unique_ptr Create(); } // namespace gl4 } // namespace gpu diff --git a/src/xenia/gpu/gl4/gl4_graphics_system.cc b/src/xenia/gpu/gl4/gl4_graphics_system.cc index 24bd0527a..f5913d1fe 100644 --- a/src/xenia/gpu/gl4/gl4_graphics_system.cc +++ b/src/xenia/gpu/gl4/gl4_graphics_system.cc @@ -14,6 +14,7 @@ #include "xenia/gpu/gl4/gl4_gpu-private.h" #include "xenia/gpu/gl4/gl4_profiler_display.h" #include "xenia/gpu/gpu-private.h" +#include "xenia/gpu/tracing.h" namespace xe { namespace gpu { @@ -21,13 +22,15 @@ namespace gl4 { extern "C" GLEWContext* glewGetContext(); -GL4GraphicsSystem::GL4GraphicsSystem(Emulator* emulator) - : GraphicsSystem(emulator), timer_queue_(nullptr), vsync_timer_(nullptr) {} +GL4GraphicsSystem::GL4GraphicsSystem() + : GraphicsSystem(), timer_queue_(nullptr), vsync_timer_(nullptr) {} GL4GraphicsSystem::~GL4GraphicsSystem() = default; -X_STATUS GL4GraphicsSystem::Setup() { - auto result = GraphicsSystem::Setup(); +X_STATUS GL4GraphicsSystem::Setup(cpu::Processor* processor, + ui::PlatformLoop* target_loop, + ui::PlatformWindow* target_window) { + auto result = GraphicsSystem::Setup(processor, target_loop, target_window); if (result) { return result; } @@ -35,14 +38,13 @@ X_STATUS GL4GraphicsSystem::Setup() { // Create rendering control. // This must happen on the UI thread. poly::threading::Fence control_ready_fence; - auto loop = emulator_->main_window()->loop(); std::unique_ptr processor_context; - loop->Post([&]() { + target_loop_->Post([&]() { // Setup the GL control that actually does the drawing. // We run here in the loop and only touch it (and its context) on this // thread. That means some sync-fu when we want to swap. - control_ = std::make_unique(loop); - emulator_->main_window()->AddChild(control_.get()); + control_ = std::make_unique(target_loop_); + target_window_->AddChild(control_.get()); // Setup the GL context the command processor will do all its drawing in. // It's shared with the control context so that we can resolve framebuffers @@ -70,8 +72,12 @@ X_STATUS GL4GraphicsSystem::Setup() { command_processor_->set_swap_handler( [this](const SwapParameters& swap_params) { SwapHandler(swap_params); }); + if (!FLAGS_trace_gpu.empty()) { + command_processor_->BeginTracing(poly::to_wstring(FLAGS_trace_gpu)); + } + // Let the processor know we want register access callbacks. - emulator_->memory()->AddMappedRange( + memory_->AddMappedRange( 0x7FC80000, 0xFFFF0000, 0x0000FFFF, this, reinterpret_cast(MMIOReadRegisterThunk), reinterpret_cast(MMIOWriteRegisterThunk)); @@ -91,6 +97,8 @@ X_STATUS GL4GraphicsSystem::Setup() { } void GL4GraphicsSystem::Shutdown() { + command_processor_->EndTracing(); + DeleteTimerQueueTimer(timer_queue_, vsync_timer_, nullptr); DeleteTimerQueue(timer_queue_); @@ -114,6 +122,101 @@ void GL4GraphicsSystem::EnableReadPointerWriteBack(uint32_t ptr, command_processor_->EnableReadPointerWriteBack(ptr, block_size); } +const uint8_t* GL4GraphicsSystem::PlayTrace(const uint8_t* trace_data, + size_t trace_size, + TracePlaybackMode playback_mode) { + auto trace_ptr = trace_data; + command_processor_->CallInThread([&]() { + bool pending_break = false; + const PacketStartCommand* pending_packet = nullptr; + while (trace_ptr < trace_data + trace_size) { + auto type = + static_cast(poly::load(trace_ptr)); + switch (type) { + case TraceCommandType::kPrimaryBufferStart: { + auto cmd = + reinterpret_cast(trace_ptr); + // + trace_ptr += sizeof(*cmd) + cmd->count * 4; + break; + } + case TraceCommandType::kPrimaryBufferEnd: { + auto cmd = + reinterpret_cast(trace_ptr); + // + trace_ptr += sizeof(*cmd); + break; + } + case TraceCommandType::kIndirectBufferStart: { + auto cmd = + reinterpret_cast(trace_ptr); + // + trace_ptr += sizeof(*cmd) + cmd->count * 4; + break; + } + case TraceCommandType::kIndirectBufferEnd: { + auto cmd = + reinterpret_cast(trace_ptr); + // + trace_ptr += sizeof(*cmd); + break; + } + case TraceCommandType::kPacketStart: { + auto cmd = reinterpret_cast(trace_ptr); + trace_ptr += sizeof(*cmd); + std::memcpy(memory()->Translate(cmd->base_ptr), trace_ptr, + cmd->count * 4); + trace_ptr += cmd->count * 4; + pending_packet = cmd; + break; + } + case TraceCommandType::kPacketEnd: { + auto cmd = reinterpret_cast(trace_ptr); + trace_ptr += sizeof(*cmd); + if (pending_packet) { + command_processor_->ExecutePacket(pending_packet->base_ptr, + pending_packet->count); + pending_packet = nullptr; + } + if (pending_break) { + return; + } + break; + } + case TraceCommandType::kMemoryRead: { + auto cmd = reinterpret_cast(trace_ptr); + trace_ptr += sizeof(*cmd); + std::memcpy(memory()->Translate(cmd->base_ptr), trace_ptr, + cmd->length); + trace_ptr += cmd->length; + break; + } + case TraceCommandType::kMemoryWrite: { + auto cmd = reinterpret_cast(trace_ptr); + trace_ptr += sizeof(*cmd); + // ? + trace_ptr += cmd->length; + break; + } + case TraceCommandType::kEvent: { + auto cmd = reinterpret_cast(trace_ptr); + trace_ptr += sizeof(*cmd); + switch (cmd->event_type) { + case EventType::kSwap: { + if (playback_mode == TracePlaybackMode::kBreakOnSwap) { + pending_break = true; + } + break; + } + } + break; + } + } + } + }); + return trace_ptr; +} + void GL4GraphicsSystem::MarkVblank() { static bool thread_name_set = false; if (!thread_name_set) { @@ -147,9 +250,6 @@ void GL4GraphicsSystem::SwapHandler(const SwapParameters& swap_params) { uint64_t GL4GraphicsSystem::ReadRegister(uint64_t addr) { uint32_t r = addr & 0xFFFF; - if (FLAGS_trace_ring_buffer) { - XELOGGPU("ReadRegister(%.4X)", r); - } switch (r) { case 0x3C00: // ? @@ -170,9 +270,6 @@ uint64_t GL4GraphicsSystem::ReadRegister(uint64_t addr) { void GL4GraphicsSystem::WriteRegister(uint64_t addr, uint64_t value) { uint32_t r = addr & 0xFFFF; - if (FLAGS_trace_ring_buffer) { - XELOGGPU("WriteRegister(%.4X, %.8X)", r, value); - } switch (r) { case 0x0714: // CP_RB_WPTR diff --git a/src/xenia/gpu/gl4/gl4_graphics_system.h b/src/xenia/gpu/gl4/gl4_graphics_system.h index 6a6d028d8..77b144da0 100644 --- a/src/xenia/gpu/gl4/gl4_graphics_system.h +++ b/src/xenia/gpu/gl4/gl4_graphics_system.h @@ -24,10 +24,11 @@ namespace gl4 { class GL4GraphicsSystem : public GraphicsSystem { public: - GL4GraphicsSystem(Emulator* emulator); + GL4GraphicsSystem(); ~GL4GraphicsSystem() override; - X_STATUS Setup() override; + X_STATUS Setup(cpu::Processor* processor, ui::PlatformLoop* target_loop, + ui::PlatformWindow* target_window) override; void Shutdown() override; RegisterFile* register_file() { return ®ister_file_; } @@ -35,6 +36,9 @@ class GL4GraphicsSystem : public GraphicsSystem { void InitializeRingBuffer(uint32_t ptr, uint32_t page_count) override; void EnableReadPointerWriteBack(uint32_t ptr, uint32_t block_size) override; + const uint8_t* PlayTrace(const uint8_t* trace_data, size_t trace_size, + TracePlaybackMode playback_mode) override; + private: void MarkVblank(); void SwapHandler(const SwapParameters& swap_params); diff --git a/src/xenia/gpu/gpu-private.h b/src/xenia/gpu/gpu-private.h index d51698c4a..58f6c81e7 100644 --- a/src/xenia/gpu/gpu-private.h +++ b/src/xenia/gpu/gpu-private.h @@ -14,7 +14,8 @@ DECLARE_string(gpu); -DECLARE_bool(trace_ring_buffer); +DECLARE_string(trace_gpu); + DECLARE_string(dump_shaders); DECLARE_bool(vsync); diff --git a/src/xenia/gpu/gpu.cc b/src/xenia/gpu/gpu.cc index ec0037f64..41c545fcf 100644 --- a/src/xenia/gpu/gpu.cc +++ b/src/xenia/gpu/gpu.cc @@ -15,7 +15,8 @@ DEFINE_string(gpu, "any", "Graphics system. Use: [any, gl4]"); -DEFINE_bool(trace_ring_buffer, false, "Trace GPU ring buffer packets."); +DEFINE_string(trace_gpu, "", "Trace GPU data to the given root path."); + DEFINE_string(dump_shaders, "", "Path to write GPU shaders to as they are compiled."); @@ -24,14 +25,14 @@ DEFINE_bool(vsync, true, "Enable VSYNC."); namespace xe { namespace gpu { -std::unique_ptr Create(Emulator* emulator) { +std::unique_ptr Create() { if (FLAGS_gpu.compare("gl4") == 0) { - return xe::gpu::gl4::Create(emulator); + return xe::gpu::gl4::Create(); } else { // Create best available. std::unique_ptr best; - best = xe::gpu::gl4::Create(emulator); + best = xe::gpu::gl4::Create(); if (best) { return best; } diff --git a/src/xenia/gpu/gpu.h b/src/xenia/gpu/gpu.h index 88541f2ca..912e05406 100644 --- a/src/xenia/gpu/gpu.h +++ b/src/xenia/gpu/gpu.h @@ -21,9 +21,9 @@ class Emulator; namespace xe { namespace gpu { -std::unique_ptr Create(Emulator* emulator); +std::unique_ptr Create(); -std::unique_ptr CreateGL4(Emulator* emulator); +std::unique_ptr CreateGL4(); } // namespace gpu } // namespace xe diff --git a/src/xenia/gpu/graphics_system.cc b/src/xenia/gpu/graphics_system.cc index b8528b7e8..e6803eb1c 100644 --- a/src/xenia/gpu/graphics_system.cc +++ b/src/xenia/gpu/graphics_system.cc @@ -10,23 +10,29 @@ #include "xenia/gpu/graphics_system.h" #include "poly/poly.h" -#include "xenia/emulator.h" #include "xenia/cpu/processor.h" #include "xenia/gpu/gpu-private.h" namespace xe { namespace gpu { -GraphicsSystem::GraphicsSystem(Emulator* emulator) - : emulator_(emulator), - memory_(emulator->memory()), +GraphicsSystem::GraphicsSystem() + : memory_(nullptr), + processor_(nullptr), + target_loop_(nullptr), + target_window_(nullptr), interrupt_callback_(0), interrupt_callback_data_(0) {} -GraphicsSystem::~GraphicsSystem() {} +GraphicsSystem::~GraphicsSystem() = default; -X_STATUS GraphicsSystem::Setup() { - processor_ = emulator_->processor(); +X_STATUS GraphicsSystem::Setup(cpu::Processor* processor, + ui::PlatformLoop* target_loop, + ui::PlatformWindow* target_window) { + processor_ = processor; + memory_ = processor->memory(); + target_loop_ = target_loop; + target_window_ = target_window; return X_STATUS_SUCCESS; } diff --git a/src/xenia/gpu/graphics_system.h b/src/xenia/gpu/graphics_system.h index 7c430b412..d5b588a70 100644 --- a/src/xenia/gpu/graphics_system.h +++ b/src/xenia/gpu/graphics_system.h @@ -14,7 +14,9 @@ #include #include "xenia/common.h" -#include "xenia/emulator.h" +#include "xenia/cpu/processor.h" +#include "xenia/memory.h" +#include "xenia/ui/main_window.h" #include "xenia/xbox.h" namespace xe { @@ -24,25 +26,37 @@ class GraphicsSystem { public: virtual ~GraphicsSystem(); - Emulator* emulator() const { return emulator_; } Memory* memory() const { return memory_; } cpu::Processor* processor() const { return processor_; } - virtual X_STATUS Setup(); + virtual X_STATUS Setup(cpu::Processor* processor, + ui::PlatformLoop* target_loop, + ui::PlatformWindow* target_window); virtual void Shutdown(); void SetInterruptCallback(uint32_t callback, uint32_t user_data); virtual void InitializeRingBuffer(uint32_t ptr, uint32_t page_count) = 0; - virtual void EnableReadPointerWriteBack(uint32_t ptr, uint32_t block_size) = 0; + virtual void EnableReadPointerWriteBack(uint32_t ptr, + uint32_t block_size) = 0; void DispatchInterruptCallback(uint32_t source, uint32_t cpu); - protected: - GraphicsSystem(Emulator* emulator); + enum class TracePlaybackMode { + kUntilEnd, + kBreakOnSwap, + }; + virtual const uint8_t* PlayTrace(const uint8_t* trace_data, size_t trace_size, + TracePlaybackMode playback_mode) { + return nullptr; + } + + protected: + GraphicsSystem(); - Emulator* emulator_; Memory* memory_; cpu::Processor* processor_; + ui::PlatformLoop* target_loop_; + ui::PlatformWindow* target_window_; uint32_t interrupt_callback_; uint32_t interrupt_callback_data_; diff --git a/src/xenia/gpu/sources.gypi b/src/xenia/gpu/sources.gypi index ec144c8af..02d227a31 100644 --- a/src/xenia/gpu/sources.gypi +++ b/src/xenia/gpu/sources.gypi @@ -15,6 +15,7 @@ 'shader.h', 'texture_info.cc', 'texture_info.h', + 'tracing.h', 'ucode.h', 'ucode_disassembler.cc', 'ucode_disassembler.h', diff --git a/src/xenia/gpu/trace_viewer_main.cc b/src/xenia/gpu/trace_viewer_main.cc new file mode 100644 index 000000000..63c3d476c --- /dev/null +++ b/src/xenia/gpu/trace_viewer_main.cc @@ -0,0 +1,73 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2015 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#include +#include "poly/main.h" +#include "poly/mapped_memory.h" +#include "xenia/gpu/graphics_system.h" +#include "xenia/gpu/tracing.h" +#include "xenia/emulator.h" +#include "xenia/ui/main_window.h" + +DEFINE_string(target_trace_file, "", "Specifies the trace file to load."); + +namespace xe { +namespace gpu { + +int trace_viewer_main(std::vector& args) { + // Create the emulator. + auto emulator = std::make_unique(L""); + X_STATUS result = emulator->Setup(); + if (XFAILED(result)) { + XELOGE("Failed to setup emulator: %.8X", result); + return 1; + } + + // Grab path from the flag or unnamed argument. + if (!FLAGS_target_trace_file.empty() || args.size() >= 2) { + std::wstring path; + if (!FLAGS_target_trace_file.empty()) { + // Passed as a named argument. + // TODO(benvanik): find something better than gflags that supports + // unicode. + path = poly::to_wstring(FLAGS_target_trace_file); + } else { + // Passed as an unnamed argument. + path = args[1]; + } + // Normalize the path and make absolute. + std::wstring abs_path = poly::to_absolute_path(path); + + // TODO(benvanik): UI? replay control on graphics system? + auto graphics_system = emulator->graphics_system(); + auto mmap = + poly::MappedMemory::Open(abs_path, poly::MappedMemory::Mode::kRead); + auto trace_data = reinterpret_cast(mmap->data()); + auto trace_size = mmap->size(); + + auto trace_ptr = trace_data; + while (trace_ptr < trace_data + trace_size) { + trace_ptr = graphics_system->PlayTrace( + trace_ptr, trace_size - (trace_ptr - trace_data), + GraphicsSystem::TracePlaybackMode::kBreakOnSwap); + } + + // Wait until we are exited. + emulator->main_window()->loop()->AwaitQuit(); + } + + emulator.reset(); + return 0; +} + +} // namespace gpu +} // namespace xe + +DEFINE_ENTRY_POINT(L"gpu_trace_viewer", L"gpu_trace_viewer some.trace", + xe::gpu::trace_viewer_main); diff --git a/src/xenia/gpu/tracing.h b/src/xenia/gpu/tracing.h new file mode 100644 index 000000000..e1919e4e3 --- /dev/null +++ b/src/xenia/gpu/tracing.h @@ -0,0 +1,211 @@ +/** + ****************************************************************************** + * Xenia : Xbox 360 Emulator Research Project * + ****************************************************************************** + * Copyright 2015 Ben Vanik. All rights reserved. * + * Released under the BSD license - see LICENSE in the root for more details. * + ****************************************************************************** + */ + +#ifndef XENIA_GPU_TRACING_H_ +#define XENIA_GPU_TRACING_H_ + +#include + +#include "xenia/memory.h" + +namespace xe { +namespace gpu { + +enum class TraceCommandType : uint32_t { + kPrimaryBufferStart, + kPrimaryBufferEnd, + kIndirectBufferStart, + kIndirectBufferEnd, + kPacketStart, + kPacketEnd, + kMemoryRead, + kMemoryWrite, + kEvent, +}; + +struct PrimaryBufferStartCommand { + TraceCommandType type; + uint32_t base_ptr; + uint32_t count; +}; + +struct PrimaryBufferEndCommand { + TraceCommandType type; +}; + +struct IndirectBufferStartCommand { + TraceCommandType type; + uint32_t base_ptr; + uint32_t count; +}; + +struct IndirectBufferEndCommand { + TraceCommandType type; +}; + +struct PacketStartCommand { + TraceCommandType type; + uint32_t base_ptr; + uint32_t count; +}; + +struct PacketEndCommand { + TraceCommandType type; +}; + +struct MemoryReadCommand { + TraceCommandType type; + uint32_t base_ptr; + uint32_t length; +}; + +struct MemoryWriteCommand { + TraceCommandType type; + uint32_t base_ptr; + uint32_t length; +}; + +enum class EventType { + kSwap, +}; + +struct EventCommand { + TraceCommandType type; + EventType event_type; +}; + +class TraceWriter { + public: + TraceWriter(uint8_t* membase) : membase_(membase), file_(nullptr) {} + ~TraceWriter() = default; + + bool Open(const std::wstring& path) { + Close(); + file_ = _wfopen(path.c_str(), L"wb"); + return file_ != nullptr; + } + + void Flush() { + if (file_) { + fflush(file_); + } + } + + void Close() { + if (file_) { + fflush(file_); + fclose(file_); + file_ = nullptr; + } + } + + void WritePrimaryBufferStart(uint32_t base_ptr, uint32_t count) { + if (!file_) { + return; + } + auto cmd = PrimaryBufferStartCommand({ + TraceCommandType::kPrimaryBufferStart, base_ptr, count, + }); + fwrite(&cmd, 1, sizeof(cmd), file_); + fwrite(membase_ + base_ptr, 4, count, file_); + } + + void WritePrimaryBufferEnd() { + if (!file_) { + return; + } + auto cmd = PrimaryBufferEndCommand({ + TraceCommandType::kPrimaryBufferEnd, + }); + fwrite(&cmd, 1, sizeof(cmd), file_); + } + + void WriteIndirectBufferStart(uint32_t base_ptr, uint32_t count) { + if (!file_) { + return; + } + auto cmd = IndirectBufferStartCommand({ + TraceCommandType::kIndirectBufferStart, base_ptr, count, + }); + fwrite(&cmd, 1, sizeof(cmd), file_); + fwrite(membase_ + base_ptr, 4, count, file_); + } + + void WriteIndirectBufferEnd() { + if (!file_) { + return; + } + auto cmd = IndirectBufferEndCommand({ + TraceCommandType::kIndirectBufferEnd, + }); + fwrite(&cmd, 1, sizeof(cmd), file_); + } + + void WritePacketStart(uint32_t base_ptr, uint32_t count) { + if (!file_) { + return; + } + auto cmd = PacketStartCommand({ + TraceCommandType::kPacketStart, base_ptr, count, + }); + fwrite(&cmd, 1, sizeof(cmd), file_); + fwrite(membase_ + base_ptr, 4, count, file_); + } + + void WritePacketEnd() { + if (!file_) { + return; + } + auto cmd = PacketEndCommand({ + TraceCommandType::kPacketEnd, + }); + fwrite(&cmd, 1, sizeof(cmd), file_); + } + + void WriteMemoryRead(uint32_t base_ptr, size_t length) { + if (!file_) { + return; + } + auto cmd = MemoryReadCommand({ + TraceCommandType::kMemoryRead, base_ptr, uint32_t(length), + }); + fwrite(&cmd, 1, sizeof(cmd), file_); + fwrite(membase_ + base_ptr, 1, length, file_); + } + + void WriteMemoryWrite(uint32_t base_ptr, size_t length) { + if (!file_) { + return; + } + auto cmd = MemoryWriteCommand({ + TraceCommandType::kMemoryWrite, base_ptr, uint32_t(length), + }); + fwrite(&cmd, 1, sizeof(cmd), file_); + fwrite(membase_ + base_ptr, 1, length, file_); + } + + void WriteEvent(EventType event_type) { + if (!file_) { + return; + } + auto cmd = EventCommand({ + TraceCommandType::kEvent, event_type, + }); + fwrite(&cmd, 1, sizeof(cmd), file_); + } + + private: + uint8_t* membase_; + FILE* file_; +}; + +} // namespace gpu +} // namespace xe + +#endif // XENIA_GPU_TRACING_H_ diff --git a/src/xenia/gpu/xenos.h b/src/xenia/gpu/xenos.h index 91c031dd7..c7f7c5e79 100644 --- a/src/xenia/gpu/xenos.h +++ b/src/xenia/gpu/xenos.h @@ -234,15 +234,6 @@ inline uint32_t GpuToCpu(uint32_t p) { return p; } -inline uint32_t GpuToCpu(uint32_t base, uint32_t p) { - // Some AMD docs say relative to base ptr, some say just this. - // Some games use some crazy shift magic, but it seems to nop. - uint32_t upper = 0;//base & 0xFF000000; - //uint32_t lower = p & 0x01FFFFFF; - uint32_t lower = p; - return upper + lower;// -(((base >> 20) + 0x200) & 0x1000); -} - // XE_GPU_REG_SQ_PROGRAM_CNTL typedef union { XEPACKEDSTRUCTANONYMOUS({ diff --git a/src/xenia/kernel/fs/devices/disc_image_device.cc b/src/xenia/kernel/fs/devices/disc_image_device.cc index fa6510dd3..11ab1dfe7 100644 --- a/src/xenia/kernel/fs/devices/disc_image_device.cc +++ b/src/xenia/kernel/fs/devices/disc_image_device.cc @@ -24,7 +24,8 @@ DiscImageDevice::DiscImageDevice(const std::string& path, DiscImageDevice::~DiscImageDevice() { delete gdfx_; } int DiscImageDevice::Init() { - mmap_ = poly::MappedMemory::Open(local_path_, poly::MappedMemory::Mode::READ); + mmap_ = + poly::MappedMemory::Open(local_path_, poly::MappedMemory::Mode::kRead); if (!mmap_) { XELOGE("Disc image could not be mapped"); return 1; diff --git a/src/xenia/kernel/fs/devices/host_path_entry.cc b/src/xenia/kernel/fs/devices/host_path_entry.cc index 32a0de4d2..f8257fd63 100644 --- a/src/xenia/kernel/fs/devices/host_path_entry.cc +++ b/src/xenia/kernel/fs/devices/host_path_entry.cc @@ -125,8 +125,8 @@ std::unique_ptr HostPathEntry::CreateMemoryMapping( Mode map_mode, const size_t offset, const size_t length) { auto mmap = poly::MappedMemory::Open( local_path_, - map_mode == Mode::READ ? poly::MappedMemory::Mode::READ - : poly::MappedMemory::Mode::READ_WRITE, + map_mode == Mode::READ ? poly::MappedMemory::Mode::kRead + : poly::MappedMemory::Mode::kReadWrite, offset, length); if (!mmap) { return nullptr; diff --git a/src/xenia/kernel/fs/devices/stfs_container_device.cc b/src/xenia/kernel/fs/devices/stfs_container_device.cc index 0951fd683..57ff17d7e 100644 --- a/src/xenia/kernel/fs/devices/stfs_container_device.cc +++ b/src/xenia/kernel/fs/devices/stfs_container_device.cc @@ -25,7 +25,8 @@ STFSContainerDevice::STFSContainerDevice(const std::string& path, STFSContainerDevice::~STFSContainerDevice() { delete stfs_; } int STFSContainerDevice::Init() { - mmap_ = poly::MappedMemory::Open(local_path_, poly::MappedMemory::Mode::READ); + mmap_ = + poly::MappedMemory::Open(local_path_, poly::MappedMemory::Mode::kRead); if (!mmap_) { XELOGE("STFS container could not be mapped"); return 1; diff --git a/src/xenia/xenia_main.cc b/src/xenia/xenia_main.cc index eb2e07b99..2962f1733 100644 --- a/src/xenia/xenia_main.cc +++ b/src/xenia/xenia_main.cc @@ -30,9 +30,9 @@ int xenia_main(std::vector& args) { } // Grab path from the flag or unnamed argument. - if (FLAGS_target.size() || args.size() >= 2) { + if (!FLAGS_target.empty() || args.size() >= 2) { std::wstring path; - if (FLAGS_target.size()) { + if (!FLAGS_target.empty()) { // Passed as a named argument. // TODO(benvanik): find something better than gflags that supports // unicode. @@ -49,10 +49,10 @@ int xenia_main(std::vector& args) { XELOGE("Failed to launch target: %.8X", result); return 1; } - } - // Wait until we are exited. - emulator->main_window()->loop()->AwaitQuit(); + // Wait until we are exited. + emulator->main_window()->loop()->AwaitQuit(); + } emulator.reset(); Profiler::Dump(); diff --git a/xenia.gyp b/xenia.gyp index 7c241be81..fee673afd 100644 --- a/xenia.gyp +++ b/xenia.gyp @@ -470,5 +470,28 @@ 'src/xenia/xenia_main.cc', ], }, + + { + 'target_name': 'gpu-trace-viewer', + 'type': 'executable', + + 'msvs_settings': { + 'VCLinkerTool': { + 'SubSystem': '2' + }, + }, + + 'dependencies': [ + 'libxenia', + ], + + 'include_dirs': [ + '.', + ], + + 'sources': [ + 'src/xenia/gpu/trace_viewer_main.cc', + ], + }, ], }