Fixing ring buffer wrap around. I think.

This commit is contained in:
Ben Vanik 2013-10-13 13:39:46 -07:00
parent e0af361d74
commit 4887234bba
1 changed files with 62 additions and 29 deletions

View File

@ -41,7 +41,10 @@ void RingBufferWorker::Initialize(GraphicsDriver* driver,
uint32_t ptr, uint32_t page_count) { uint32_t ptr, uint32_t page_count) {
driver_ = driver; driver_ = driver;
primary_buffer_ptr_ = ptr; primary_buffer_ptr_ = ptr;
primary_buffer_size_ = page_count * 4 * 1024; // Not sure this is correct, but it's a way to take the page_count back to
// the number of bytes allocated by the physical alloc.
uint32_t original_size = 1 << (0x1C - page_count - 1);
primary_buffer_size_ = original_size;
read_ptr_index_ = 0; read_ptr_index_ = 0;
} }
@ -86,9 +89,19 @@ void RingBufferWorker::Pump() {
// Process the new commands. // Process the new commands.
XELOGGPU("Ring buffer thread work"); XELOGGPU("Ring buffer thread work");
// TODO(benvanik): handle wrapping around // Handle wrapping around.
// read_ptr_index_ = (read_ptr_index_ + 1) % (primary_buffer_size_ / 4); // TODO(benvanik): verify this is correct.
XEASSERT(write_ptr_index > read_ptr_index_); if (write_ptr_index_ < read_ptr_index_) {
// We wrapped. Execute all instructions until the end and go back to 0.
XELOGGPU("Ring buffer wrapped back to zero (read %0.8X, write %0.8X)",
read_ptr_index_, write_ptr_index);
uint32_t pre_length = (primary_buffer_size_ / 4) - read_ptr_index_;
if (pre_length) {
ExecuteSegment(primary_buffer_ptr_ + read_ptr_index_ * 4, pre_length);
}
read_ptr_index_ = 0;
}
uint32_t length = write_ptr_index - read_ptr_index_; uint32_t length = write_ptr_index - read_ptr_index_;
if (length) { if (length) {
ExecuteSegment(primary_buffer_ptr_ + read_ptr_index_ * 4, length); ExecuteSegment(primary_buffer_ptr_ + read_ptr_index_ * 4, length);
@ -114,12 +127,14 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) {
#define LOG_DATA(count) \ #define LOG_DATA(count) \
for (uint32_t __m = 0; __m < count; __m++) { \ for (uint32_t __m = 0; __m < count; __m++) { \
XELOGGPU(" %.8X", XEGETUINT32BE(packet_base + 1 * 4 + __m * 4)); \ XELOGGPU("[%.8X] %.8X", \
ptr + (n + 1 + __m) * 4, \
XEGETUINT32BE(packet_base + 1 * 4 + __m * 4)); \
} }
#define TRANSLATE_ADDR(p) \ #define TRANSLATE_ADDR(p) \
((p & ~0x3) + (primary_buffer_ptr_ & ~0x1FFFFFFF)) ((p & ~0x3) + (primary_buffer_ptr_ & ~0x1FFFFFFF))
XELOGGPU("CommandList(%.8X): executing %dw", ptr, length); XELOGGPU("[%.8X] CommandList(): executing %dw", ptr, length);
// Execute commands! // Execute commands!
for (uint32_t n = 0; n < length;) { for (uint32_t n = 0; n < length;) {
@ -138,14 +153,16 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) {
// Type-0 packet. // Type-0 packet.
// Write count registers in sequence to the registers starting at // Write count registers in sequence to the registers starting at
// (base_index << 2). // (base_index << 2).
XELOGGPU("Packet(%.8X): set registers:", packet); XELOGGPU("[%.8X] Packet(%.8X): set registers:", ptr + n * 4,
ptr + n * 4, packet);
uint32_t count = ((packet >> 16) & 0x3FFF) + 1; uint32_t count = ((packet >> 16) & 0x3FFF) + 1;
uint32_t base_index = (packet & 0xFFFF); uint32_t base_index = (packet & 0xFFFF);
for (uint32_t m = 0; m < count; m++) { for (uint32_t m = 0; m < count; m++) {
uint32_t reg_data = XEGETUINT32BE(packet_base + 1 * 4 + m * 4); uint32_t reg_data = XEGETUINT32BE(packet_base + 1 * 4 + m * 4);
const char* reg_name = xenos::GetRegisterName(base_index + m); const char* reg_name = xenos::GetRegisterName(base_index + m);
XELOGGPU(" %.8X -> %.4X %s", reg_data, base_index + m, XELOGGPU("[%.8X] %.8X -> %.4X %s",
reg_name ? reg_name : ""); ptr + (n + 1 + m) * 4,
reg_data, base_index + m, reg_name ? reg_name : "");
// TODO(benvanik): exec write handler (if special). // TODO(benvanik): exec write handler (if special).
if (base_index + m < kXEGpuRegisterCount) { if (base_index + m < kXEGpuRegisterCount) {
regs->values[base_index + m].u32 = reg_data; regs->values[base_index + m].u32 = reg_data;
@ -158,17 +175,20 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) {
{ {
// Type-1 packet. // Type-1 packet.
// Contains two registers of data. Type-0 should be more common. // Contains two registers of data. Type-0 should be more common.
XELOGGPU("Packet(%.8X): set registers:", packet); XELOGGPU("[%.8X] Packet(%.8X): set registers:",
ptr + n * 4, packet);
uint32_t reg_index_1 = packet & 0x7FF; uint32_t reg_index_1 = packet & 0x7FF;
uint32_t reg_index_2 = (packet >> 11) & 0x7FF; uint32_t reg_index_2 = (packet >> 11) & 0x7FF;
uint32_t reg_data_1 = XEGETUINT32BE(packet_base + 1 * 4); uint32_t reg_data_1 = XEGETUINT32BE(packet_base + 1 * 4);
uint32_t reg_data_2 = XEGETUINT32BE(packet_base + 2 * 4); uint32_t reg_data_2 = XEGETUINT32BE(packet_base + 2 * 4);
const char* reg_name_1 = xenos::GetRegisterName(reg_index_1); const char* reg_name_1 = xenos::GetRegisterName(reg_index_1);
const char* reg_name_2 = xenos::GetRegisterName(reg_index_2); const char* reg_name_2 = xenos::GetRegisterName(reg_index_2);
XELOGGPU(" %.8X -> %.4X %s", reg_data_1, reg_index_1, XELOGGPU("[%.8X] %.8X -> %.4X %s",
reg_name_1 ? reg_name_1 : ""); ptr + (n + 1) * 4,
XELOGGPU(" %.8X -> %.4X %s", reg_data_2, reg_index_2, reg_data_1, reg_index_1, reg_name_1 ? reg_name_1 : "");
reg_name_2 ? reg_name_2 : ""); XELOGGPU("[%.8X] %.8X -> %.4X %s",
ptr + (n + 2) * 4,
reg_data_2, reg_index_2, reg_name_2 ? reg_name_2 : "");
// TODO(benvanik): exec write handler (if special). // TODO(benvanik): exec write handler (if special).
if (reg_index_1 < kXEGpuRegisterCount) { if (reg_index_1 < kXEGpuRegisterCount) {
regs->values[reg_index_1].u32 = reg_data_1; regs->values[reg_index_1].u32 = reg_data_1;
@ -195,14 +215,16 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) {
switch (opcode) { switch (opcode) {
case PM4_ME_INIT: case PM4_ME_INIT:
// initialize CP's micro-engine // initialize CP's micro-engine
XELOGGPU("Packet(%.8X): PM4_ME_INIT", packet); XELOGGPU("[%.8X] Packet(%.8X): PM4_ME_INIT",
ptr + n * 4, packet);
LOG_DATA(count); LOG_DATA(count);
break; break;
case PM4_NOP: case PM4_NOP:
// skip N 32-bit words to get to the next packet // skip N 32-bit words to get to the next packet
// No-op, ignore some data. // No-op, ignore some data.
XELOGGPU("Packet(%.8X): PM4_NOP", packet); XELOGGPU("[%.8X] Packet(%.8X): PM4_NOP",
ptr + n * 4, packet);
LOG_DATA(count); LOG_DATA(count);
break; break;
@ -211,8 +233,8 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) {
{ {
uint32_t list_ptr = XEGETUINT32BE(packet_base + 1 * 4); uint32_t list_ptr = XEGETUINT32BE(packet_base + 1 * 4);
uint32_t list_length = XEGETUINT32BE(packet_base + 2 * 4); uint32_t list_length = XEGETUINT32BE(packet_base + 2 * 4);
XELOGGPU("Packet(%.8X): PM4_INDIRECT_BUFFER %.8X (%dw)", XELOGGPU("[%.8X] Packet(%.8X): PM4_INDIRECT_BUFFER %.8X (%dw)",
packet, list_ptr, list_length); ptr + n * 4, packet, list_ptr, list_length);
ExecuteSegment(list_ptr, list_length); ExecuteSegment(list_ptr, list_length);
driver_->set_address_translation(primary_buffer_ptr_ & ~0x1FFFFFFF); driver_->set_address_translation(primary_buffer_ptr_ & ~0x1FFFFFFF);
} }
@ -220,32 +242,37 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) {
case PM4_WAIT_REG_MEM: case PM4_WAIT_REG_MEM:
// wait until a register or memory location is a specific value // wait until a register or memory location is a specific value
XELOGGPU("Packet(%.8X): PM4_WAIT_REG_MEM", packet); XELOGGPU("[%.8X] Packet(%.8X): PM4_WAIT_REG_MEM",
ptr + n * 4, packet);
LOG_DATA(count); LOG_DATA(count);
break; break;
case PM4_REG_RMW: case PM4_REG_RMW:
// register read/modify/write // register read/modify/write
// ? (used during shader upload and edram setup) // ? (used during shader upload and edram setup)
XELOGGPU("Packet(%.8X): PM4_REG_RMW", packet); XELOGGPU("[%.8X] Packet(%.8X): PM4_REG_RMW",
ptr + n * 4, packet);
LOG_DATA(count); LOG_DATA(count);
break; break;
case PM4_COND_WRITE: case PM4_COND_WRITE:
// conditional write to memory or register // conditional write to memory or register
XELOGGPU("Packet(%.8X): PM4_COND_WRITE", packet); XELOGGPU("[%.8X] Packet(%.8X): PM4_COND_WRITE",
ptr + n * 4, packet);
LOG_DATA(count); LOG_DATA(count);
break; break;
case PM4_EVENT_WRITE: case PM4_EVENT_WRITE:
// generate an event that creates a write to memory when completed // generate an event that creates a write to memory when completed
XELOGGPU("Packet(%.8X): PM4_EVENT_WRITE", packet); XELOGGPU("[%.8X] Packet(%.8X): PM4_EVENT_WRITE",
ptr + n * 4, packet);
LOG_DATA(count); LOG_DATA(count);
break; break;
case PM4_EVENT_WRITE_SHD: case PM4_EVENT_WRITE_SHD:
// generate a VS|PS_done event // generate a VS|PS_done event
{ {
XELOGGPU("Packet(%.8X): PM4_EVENT_WRITE_SHD", packet); XELOGGPU("[%.8X] Packet(%.8X): PM4_EVENT_WRITE_SHD",
ptr + n * 4, packet);
LOG_DATA(count); LOG_DATA(count);
// 3? // 3?
uint32_t d0 = XEGETUINT32BE(packet_base + 1 * 4); uint32_t d0 = XEGETUINT32BE(packet_base + 1 * 4);
@ -260,7 +287,8 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) {
case PM4_DRAW_INDX: case PM4_DRAW_INDX:
// initiate fetch of index buffer and draw // initiate fetch of index buffer and draw
{ {
XELOGGPU("Packet(%.8X): PM4_DRAW_INDX", packet); XELOGGPU("[%.8X] Packet(%.8X): PM4_DRAW_INDX",
ptr + n * 4, packet);
LOG_DATA(count); LOG_DATA(count);
// d0 = viz query info // d0 = viz query info
uint32_t d0 = XEGETUINT32BE(packet_base + 1 * 4); uint32_t d0 = XEGETUINT32BE(packet_base + 1 * 4);
@ -277,7 +305,8 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) {
case PM4_DRAW_INDX_2: case PM4_DRAW_INDX_2:
// draw using supplied indices in packet // draw using supplied indices in packet
{ {
XELOGGPU("Packet(%.8X): PM4_DRAW_INDX_2", packet); XELOGGPU("[%.8X] Packet(%.8X): PM4_DRAW_INDX_2",
ptr + n * 4, packet);
LOG_DATA(count); LOG_DATA(count);
uint32_t d0 = XEGETUINT32BE(packet_base + 1 * 4); uint32_t d0 = XEGETUINT32BE(packet_base + 1 * 4);
uint32_t index_count = d0 >> 16; uint32_t index_count = d0 >> 16;
@ -293,7 +322,8 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) {
case PM4_IM_LOAD: case PM4_IM_LOAD:
// load sequencer instruction memory (pointer-based) // load sequencer instruction memory (pointer-based)
{ {
XELOGGPU("Packet(%.8X): PM4_IM_LOAD", packet); XELOGGPU("[%.8X] Packet(%.8X): PM4_IM_LOAD",
ptr + n * 4, packet);
LOG_DATA(count); LOG_DATA(count);
uint32_t addr_type = XEGETUINT32BE(packet_base + 1 * 4); uint32_t addr_type = XEGETUINT32BE(packet_base + 1 * 4);
uint32_t type = addr_type & 0x3; uint32_t type = addr_type & 0x3;
@ -312,7 +342,8 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) {
case PM4_IM_LOAD_IMMEDIATE: case PM4_IM_LOAD_IMMEDIATE:
// load sequencer instruction memory (code embedded in packet) // load sequencer instruction memory (code embedded in packet)
{ {
XELOGGPU("Packet(%.8X): PM4_IM_LOAD_IMMEDIATE", packet); XELOGGPU("[%.8X] Packet(%.8X): PM4_IM_LOAD_IMMEDIATE",
ptr + n * 4, packet);
uint32_t type = XEGETUINT32BE(packet_base + 1 * 4); uint32_t type = XEGETUINT32BE(packet_base + 1 * 4);
uint32_t start_size = XEGETUINT32BE(packet_base + 2 * 4); uint32_t start_size = XEGETUINT32BE(packet_base + 2 * 4);
uint32_t start = start_size >> 16; uint32_t start = start_size >> 16;
@ -330,7 +361,8 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) {
case PM4_INVALIDATE_STATE: case PM4_INVALIDATE_STATE:
// selective invalidation of state pointers // selective invalidation of state pointers
{ {
XELOGGPU("Packet(%.8X): PM4_INVALIDATE_STATE", packet); XELOGGPU("[%.8X] Packet(%.8X): PM4_INVALIDATE_STATE",
ptr + n * 4, packet);
LOG_DATA(count); LOG_DATA(count);
uint32_t mask = XEGETUINT32BE(packet_base + 1 * 4); uint32_t mask = XEGETUINT32BE(packet_base + 1 * 4);
driver_->InvalidateState(mask); driver_->InvalidateState(mask);
@ -338,7 +370,8 @@ void RingBufferWorker::ExecuteSegment(uint32_t ptr, uint32_t length) {
break; break;
default: default:
XELOGGPU("Packet(%.8X): unknown!", packet); XELOGGPU("[%.8X] Packet(%.8X): unknown!",
ptr + n * 4, packet);
LOG_DATA(count); LOG_DATA(count);
break; break;
} }