diff --git a/src/xenia/gpu/ring_buffer_worker.cc b/src/xenia/gpu/ring_buffer_worker.cc index 2c15e2513..65bdb77ef 100644 --- a/src/xenia/gpu/ring_buffer_worker.cc +++ b/src/xenia/gpu/ring_buffer_worker.cc @@ -41,7 +41,10 @@ void RingBufferWorker::Initialize(GraphicsDriver* driver, uint32_t ptr, uint32_t page_count) { driver_ = driver; primary_buffer_ptr_ = ptr; - primary_buffer_size_ = page_count * 4 * 1024; + // Not sure this is correct, but it's a way to take the page_count back to + // the number of bytes allocated by the physical alloc. + uint32_t original_size = 1 << (0x1C - page_count - 1); + primary_buffer_size_ = original_size; read_ptr_index_ = 0; } @@ -86,9 +89,19 @@ void RingBufferWorker::Pump() { // Process the new commands. XELOGGPU("Ring buffer thread work"); - // TODO(benvanik): handle wrapping around - // read_ptr_index_ = (read_ptr_index_ + 1) % (primary_buffer_size_ / 4); - XEASSERT(write_ptr_index > read_ptr_index_); + // Handle wrapping around. + // TODO(benvanik): verify this is correct. + if (write_ptr_index_ < read_ptr_index_) { + // We wrapped. Execute all instructions until the end and go back to 0. + XELOGGPU("Ring buffer wrapped back to zero (read %0.8X, write %0.8X)", + read_ptr_index_, write_ptr_index); + uint32_t pre_length = (primary_buffer_size_ / 4) - read_ptr_index_; + if (pre_length) { + ExecuteSegment(primary_buffer_ptr_ + read_ptr_index_ * 4, pre_length); + } + read_ptr_index_ = 0; + } + uint32_t length = write_ptr_index - read_ptr_index_; if (length) { ExecuteSegment(primary_buffer_ptr_ + read_ptr_index_ * 4, length); @@ -114,12 +127,14 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) { #define LOG_DATA(count) \ for (uint32_t __m = 0; __m < count; __m++) { \ - XELOGGPU(" %.8X", XEGETUINT32BE(packet_base + 1 * 4 + __m * 4)); \ + XELOGGPU("[%.8X] %.8X", \ + ptr + (n + 1 + __m) * 4, \ + XEGETUINT32BE(packet_base + 1 * 4 + __m * 4)); \ } #define TRANSLATE_ADDR(p) \ ((p & ~0x3) + (primary_buffer_ptr_ & ~0x1FFFFFFF)) - XELOGGPU("CommandList(%.8X): executing %dw", ptr, length); + XELOGGPU("[%.8X] CommandList(): executing %dw", ptr, length); // Execute commands! for (uint32_t n = 0; n < length;) { @@ -138,14 +153,16 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) { // Type-0 packet. // Write count registers in sequence to the registers starting at // (base_index << 2). - XELOGGPU("Packet(%.8X): set registers:", packet); + XELOGGPU("[%.8X] Packet(%.8X): set registers:", ptr + n * 4, + ptr + n * 4, packet); uint32_t count = ((packet >> 16) & 0x3FFF) + 1; uint32_t base_index = (packet & 0xFFFF); for (uint32_t m = 0; m < count; m++) { uint32_t reg_data = XEGETUINT32BE(packet_base + 1 * 4 + m * 4); const char* reg_name = xenos::GetRegisterName(base_index + m); - XELOGGPU(" %.8X -> %.4X %s", reg_data, base_index + m, - reg_name ? reg_name : ""); + XELOGGPU("[%.8X] %.8X -> %.4X %s", + ptr + (n + 1 + m) * 4, + reg_data, base_index + m, reg_name ? reg_name : ""); // TODO(benvanik): exec write handler (if special). if (base_index + m < kXEGpuRegisterCount) { regs->values[base_index + m].u32 = reg_data; @@ -158,17 +175,20 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) { { // Type-1 packet. // Contains two registers of data. Type-0 should be more common. - XELOGGPU("Packet(%.8X): set registers:", packet); + XELOGGPU("[%.8X] Packet(%.8X): set registers:", + ptr + n * 4, packet); uint32_t reg_index_1 = packet & 0x7FF; uint32_t reg_index_2 = (packet >> 11) & 0x7FF; uint32_t reg_data_1 = XEGETUINT32BE(packet_base + 1 * 4); uint32_t reg_data_2 = XEGETUINT32BE(packet_base + 2 * 4); const char* reg_name_1 = xenos::GetRegisterName(reg_index_1); const char* reg_name_2 = xenos::GetRegisterName(reg_index_2); - XELOGGPU(" %.8X -> %.4X %s", reg_data_1, reg_index_1, - reg_name_1 ? reg_name_1 : ""); - XELOGGPU(" %.8X -> %.4X %s", reg_data_2, reg_index_2, - reg_name_2 ? reg_name_2 : ""); + XELOGGPU("[%.8X] %.8X -> %.4X %s", + ptr + (n + 1) * 4, + reg_data_1, reg_index_1, reg_name_1 ? reg_name_1 : ""); + XELOGGPU("[%.8X] %.8X -> %.4X %s", + ptr + (n + 2) * 4, + reg_data_2, reg_index_2, reg_name_2 ? reg_name_2 : ""); // TODO(benvanik): exec write handler (if special). if (reg_index_1 < kXEGpuRegisterCount) { regs->values[reg_index_1].u32 = reg_data_1; @@ -195,14 +215,16 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) { switch (opcode) { case PM4_ME_INIT: // initialize CP's micro-engine - XELOGGPU("Packet(%.8X): PM4_ME_INIT", packet); + XELOGGPU("[%.8X] Packet(%.8X): PM4_ME_INIT", + ptr + n * 4, packet); LOG_DATA(count); break; case PM4_NOP: // skip N 32-bit words to get to the next packet // No-op, ignore some data. - XELOGGPU("Packet(%.8X): PM4_NOP", packet); + XELOGGPU("[%.8X] Packet(%.8X): PM4_NOP", + ptr + n * 4, packet); LOG_DATA(count); break; @@ -211,8 +233,8 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) { { uint32_t list_ptr = XEGETUINT32BE(packet_base + 1 * 4); uint32_t list_length = XEGETUINT32BE(packet_base + 2 * 4); - XELOGGPU("Packet(%.8X): PM4_INDIRECT_BUFFER %.8X (%dw)", - packet, list_ptr, list_length); + XELOGGPU("[%.8X] Packet(%.8X): PM4_INDIRECT_BUFFER %.8X (%dw)", + ptr + n * 4, packet, list_ptr, list_length); ExecuteSegment(list_ptr, list_length); driver_->set_address_translation(primary_buffer_ptr_ & ~0x1FFFFFFF); } @@ -220,32 +242,37 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) { case PM4_WAIT_REG_MEM: // wait until a register or memory location is a specific value - XELOGGPU("Packet(%.8X): PM4_WAIT_REG_MEM", packet); + XELOGGPU("[%.8X] Packet(%.8X): PM4_WAIT_REG_MEM", + ptr + n * 4, packet); LOG_DATA(count); break; case PM4_REG_RMW: // register read/modify/write // ? (used during shader upload and edram setup) - XELOGGPU("Packet(%.8X): PM4_REG_RMW", packet); + XELOGGPU("[%.8X] Packet(%.8X): PM4_REG_RMW", + ptr + n * 4, packet); LOG_DATA(count); break; case PM4_COND_WRITE: // conditional write to memory or register - XELOGGPU("Packet(%.8X): PM4_COND_WRITE", packet); + XELOGGPU("[%.8X] Packet(%.8X): PM4_COND_WRITE", + ptr + n * 4, packet); LOG_DATA(count); break; case PM4_EVENT_WRITE: // generate an event that creates a write to memory when completed - XELOGGPU("Packet(%.8X): PM4_EVENT_WRITE", packet); + XELOGGPU("[%.8X] Packet(%.8X): PM4_EVENT_WRITE", + ptr + n * 4, packet); LOG_DATA(count); break; case PM4_EVENT_WRITE_SHD: // generate a VS|PS_done event { - XELOGGPU("Packet(%.8X): PM4_EVENT_WRITE_SHD", packet); + XELOGGPU("[%.8X] Packet(%.8X): PM4_EVENT_WRITE_SHD", + ptr + n * 4, packet); LOG_DATA(count); // 3? uint32_t d0 = XEGETUINT32BE(packet_base + 1 * 4); @@ -260,7 +287,8 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) { case PM4_DRAW_INDX: // initiate fetch of index buffer and draw { - XELOGGPU("Packet(%.8X): PM4_DRAW_INDX", packet); + XELOGGPU("[%.8X] Packet(%.8X): PM4_DRAW_INDX", + ptr + n * 4, packet); LOG_DATA(count); // d0 = viz query info uint32_t d0 = XEGETUINT32BE(packet_base + 1 * 4); @@ -277,7 +305,8 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) { case PM4_DRAW_INDX_2: // draw using supplied indices in packet { - XELOGGPU("Packet(%.8X): PM4_DRAW_INDX_2", packet); + XELOGGPU("[%.8X] Packet(%.8X): PM4_DRAW_INDX_2", + ptr + n * 4, packet); LOG_DATA(count); uint32_t d0 = XEGETUINT32BE(packet_base + 1 * 4); uint32_t index_count = d0 >> 16; @@ -293,7 +322,8 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) { case PM4_IM_LOAD: // load sequencer instruction memory (pointer-based) { - XELOGGPU("Packet(%.8X): PM4_IM_LOAD", packet); + XELOGGPU("[%.8X] Packet(%.8X): PM4_IM_LOAD", + ptr + n * 4, packet); LOG_DATA(count); uint32_t addr_type = XEGETUINT32BE(packet_base + 1 * 4); uint32_t type = addr_type & 0x3; @@ -312,7 +342,8 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) { case PM4_IM_LOAD_IMMEDIATE: // load sequencer instruction memory (code embedded in packet) { - XELOGGPU("Packet(%.8X): PM4_IM_LOAD_IMMEDIATE", packet); + XELOGGPU("[%.8X] Packet(%.8X): PM4_IM_LOAD_IMMEDIATE", + ptr + n * 4, packet); uint32_t type = XEGETUINT32BE(packet_base + 1 * 4); uint32_t start_size = XEGETUINT32BE(packet_base + 2 * 4); uint32_t start = start_size >> 16; @@ -330,7 +361,8 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) { case PM4_INVALIDATE_STATE: // selective invalidation of state pointers { - XELOGGPU("Packet(%.8X): PM4_INVALIDATE_STATE", packet); + XELOGGPU("[%.8X] Packet(%.8X): PM4_INVALIDATE_STATE", + ptr + n * 4, packet); LOG_DATA(count); uint32_t mask = XEGETUINT32BE(packet_base + 1 * 4); driver_->InvalidateState(mask); @@ -338,7 +370,8 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) { break; default: - XELOGGPU("Packet(%.8X): unknown!", packet); + XELOGGPU("[%.8X] Packet(%.8X): unknown!", + ptr + n * 4, packet); LOG_DATA(count); break; }